## Replication Code for "How the Clustering of Young and Highly-Educated Voters Undermines Redistributive Politics"

#####################################################################################################
## NOTE: We used R version 4.1.3. With this version of R, running brm models required us to first... ##

# i. Install Rtools from https://cran.r-project.org/bin/windows/Rtools/rtools43/rtools.html

# ii. Remove prior versions of StanHeaders and rstan, then install latest versions using:
remove.packages(c("StanHeaders","rstan"))
install.packages("StanHeaders", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
install.packages("rstan", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))

  ## (You can skip this if you already have a working rstan setup) ##
######################################################################################################

library(psych)
library(tidyverse)
library(corrplot)
library(factoextra)
library(brms)
library(stringr)
library(data.table)
library(stargazer)
library(matrixStats)
library(zoo)
library(reshape2)
library(readstata13)
library(readxl)
library(sf)
library(BBmisc)
library(viridis)
library(ggpubr)
library(gridExtra)
library(grid)
library(lme4)

### Set rep.dir to the path where the replication folder is located

rep.dir <- "[YOUR WORKING DIRECTORY]"  #main folder for replication

data.dir <- paste0(rep.dir, "/data/")
plot.dir <- paste0(rep.dir, "/figures/")
output.dir <- paste0(rep.dir, "/output/")


####################################################
## 1. OVER-TIME PATTERNS BY GROUP (BSAS, FIGURE 1)
####################################################

b <- read.dta13(paste0(data.dir,"bsa8320.dta"))

# Coding variables

# Redist [higher = more in favor]
b$redist=NA
b$redist[which(as.numeric(b$redistrb)==3)] <- 5
b$redist[which(as.numeric(b$redistrb)==4)] <- 4
b$redist[which(as.numeric(b$redistrb)==5)] <- 3
b$redist[which(as.numeric(b$redistrb)==6)] <- 2
b$redist[which(as.numeric(b$redistrb)==7)] <- 1

# Shd spend more on bens [higher = spend more]
b$spendmore <- rep(NA, length(b$morewelf))
b$spendmore[which(as.numeric(b$morewelf)==5)] <- 1
b$spendmore[which(as.numeric(b$morewelf)==4)] <- 2
b$spendmore[which(as.numeric(b$morewelf)==3)] <- 3
b$spendmore[which(as.numeric(b$morewelf)==2)] <- 4
b$spendmore[which(as.numeric(b$morewelf)==1)] <- 5

# people on bens don't really deserve help [higher = agree more]
b$deserve <- rep(NA, length(b$sochelp))
b$deserve[which(as.numeric(b$sochelp)==5)] <- 1
b$deserve[which(as.numeric(b$sochelp)==4)] <- 2
b$deserve[which(as.numeric(b$sochelp)==3)] <- 3
b$deserve[which(as.numeric(b$sochelp)==2)] <- 4
b$deserve[which(as.numeric(b$sochelp)==1)] <- 5

# prevent standing on own two feet [higher = agree more]
b$feet <- rep(NA, length(b$welffeet))
b$feet[which(as.numeric(b$welffeet)==5)] <- 1
b$feet[which(as.numeric(b$welffeet)==4)] <- 2
b$feet[which(as.numeric(b$welffeet)==3)] <- 3
b$feet[which(as.numeric(b$welffeet)==2)] <- 4
b$feet[which(as.numeric(b$welffeet)==1)] <- 5

# fiddling one way or another [higher = agree more]
b$fiddle <- rep(NA, length(b$dolefidl))
b$fiddle[which(as.numeric(b$dolefidl)==5)] <- 1
b$fiddle[which(as.numeric(b$dolefidl)==4)] <- 2
b$fiddle[which(as.numeric(b$dolefidl)==3)] <- 3
b$fiddle[which(as.numeric(b$dolefidl)==2)] <- 4
b$fiddle[which(as.numeric(b$dolefidl)==1)] <- 5


ryears = c(1986,1987,1989,1990,1991,1993,1994,1995,1996,1998,1999,2000,2001, 
           2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020)

wfyears = c(1987,1989,1991,1993,1994,1995,1996,1998,1999,2000,2001, 
            2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020)

wyears = c(1987,1989,1991,1993,1994,1995,1996,1998,1999,2000,2001, 
           2002,2003,2004,2005,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020)

ryears_educ = c(1986,1987,1989,1990,1991,1993,1994,1995,1996,1998,1999,2000,2001, 
                2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)

wfyears_educ = c(1987,1989,1991,1993,1994,1995,1996,1998,1999,2000,2001, 
                 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)

wyears_educ = c(1987,1989,1991,1993,1994,1995,1996,1998,1999,2000,2001, 
                2002,2003,2004,2005,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)


# Education (fig 1a)

# degree education
b$hedqual1 <- as.numeric(b$hedqual)
b$degree <- rep(NA, length(b[,1]))
b$degree[which(b$hedqual1==1)] <- 1

# higher education below degree
b$highed <- rep(NA, length(b[,1]))
b$highed[which(b$hedqual1==3)] <- 1

# completed secondary school or below
b$school <- rep(NA, length(b[,1]))
b$school[which(b$hedqual1==3|b$hedqual1==4|b$hedqual1==5|b$hedqual1==7)] <- 1


## redist, educ
r=c()
b1 <- b[!is.na(b$redist),]
for(i in 1:length(ryears_educ)){
  b2=subset(b1,b1$year==ryears_educ[i])
  r[i]=(mean(b2$redist[b2$school==1],na.rm=T))
}
redist <- c(r[1:2],NA,r[3:5],NA,r[6:9],NA,r[10:31])
redist.la <- na.approx(redist)
redist.la.sch <- c(NA,redist.la)

r=c()
b1 <- b[!is.na(b$redist),]
for(i in 1:length(ryears_educ)){
  b2=subset(b1,b1$year==ryears_educ[i])
  r[i]=(mean(b2$redist[b2$highed==1],na.rm=T))
}
redist <- c(r[1:2],NA,r[3:5],NA,r[6:9],NA,r[10:31])
redist.la <- na.approx(redist)
redist.la.he <- c(NA,redist.la)

r=c()
b1 <- b[!is.na(b$redist),]
for(i in 1:length(ryears_educ)){
  b2=subset(b1,b1$year==ryears_educ[i])
  r[i]=(mean(b2$redist[b2$degree==1],na.rm=T))
}
redist <- c(r[1:2],NA,r[3:5],NA,r[6:9],NA,r[10:31])
redist.la <- na.approx(redist)
redist.la.deg <- c(NA,redist.la)


## Own two feet, educ
wf=c()
b1 <- b[!is.na(b$feet),]
for(i in 1:length(wfyears_educ)){
  b2=subset(b1,b1$year==wfyears_educ[i])
  wf[i]=(mean(b2$feet[b2$school==1],na.rm=T))
}
onfeet <- c(wf[1],NA,wf[2],NA,wf[3],NA,wf[4:7],NA,wf[8:31])
onfeet.la <- na.approx(onfeet)
onfeet.la.sch <- c(NA,NA,onfeet.la)

wf=c()
b1 <- b[!is.na(b$feet),]
for(i in 1:length(wfyears_educ)){
  b2=subset(b1,b1$year==wfyears_educ[i])
  wf[i]=(mean(b2$feet[b2$highed==1],na.rm=T))
}
onfeet <- c(wf[1],NA,wf[2],NA,wf[3],NA,wf[4:7],NA,wf[8:31])
onfeet.la <- na.approx(onfeet)
onfeet.la.he <- c(NA,NA,onfeet.la)

wf=c()
b1 <- b[!is.na(b$feet),]
for(i in 1:length(wfyears_educ)){
  b2=subset(b1,b1$year==wfyears_educ[i])
  wf[i]=(mean(b2$feet[b2$degree==1],na.rm=T))
}
onfeet <- c(wf[1],NA,wf[2],NA,wf[3],NA,wf[4:7],NA,wf[8:31])
onfeet.la <- na.approx(onfeet)
onfeet.la.deg <- c(NA,NA,onfeet.la)


## Fiddling, educ
fi=c()
b1 <- b[!is.na(b$fiddle),]
for(i in 1:length(wfyears_educ)){
  b2=subset(b1,b1$year==wfyears_educ[i])
  fi[i]=(mean(b2$fiddle[b2$school==1],na.rm=T))
}

fiddling <- c(fi[1],NA,fi[2],NA,fi[3],NA,fi[4:7],NA,fi[8:29])
fiddling.la <- na.approx(fiddling)
fiddling.la.sch <- c(NA,NA,fiddling.la)

fi=c()
b1 <- b[!is.na(b$fiddle),]
for(i in 1:length(wfyears_educ)){
  b2=subset(b1,b1$year==wfyears_educ[i])
  fi[i]=(mean(b2$fiddle[b2$highed==1],na.rm=T))
}

fiddling <- c(fi[1],NA,fi[2],NA,fi[3],NA,fi[4:7],NA,fi[8:29])
fiddling.la <- na.approx(fiddling)
fiddling.la.he <- c(NA,NA,fiddling.la)


fi=c()
b1 <- b[!is.na(b$fiddle),]
for(i in 1:length(wfyears_educ)){
  b2=subset(b1,b1$year==wfyears_educ[i])
  fi[i]=(mean(b2$fiddle[b2$degree==1],na.rm=T))
}

fiddling <- c(fi[1],NA,fi[2],NA,fi[3],NA,fi[4:7],NA,fi[8:29])
fiddling.la <- na.approx(fiddling)
fiddling.la.deg <- c(NA,NA,fiddling.la)


## Deserving, educ 
s=c()
b1 <- b[!is.na(b$deserve),]
for(i in 1:length(wyears_educ)){
  b2=subset(b1,b1$year==wyears_educ[i])
  s[i]=(mean(b2$deserve[b2$school==1],na.rm=T))
}
sochelp <- c(s[1],NA,s[2],NA,s[3],NA,s[4:7],NA,s[8:15],NA,s[16:28])
sochelp.la <- na.approx(sochelp)
sochelp.la.sch <- c(NA,NA,sochelp.la)

s=c()
b1 <- b[!is.na(b$deserve),]
for(i in 1:length(wyears_educ)){
  b2=subset(b1,b1$year==wyears_educ[i])
  s[i]=(mean(b2$deserve[b2$highed==1],na.rm=T))
}
sochelp <- c(s[1],NA,s[2],NA,s[3],NA,s[4:7],NA,s[8:15],NA,s[16:28])
sochelp.la <- na.approx(sochelp)
sochelp.la.he <- c(NA,NA,sochelp.la)

s=c()
b1 <- b[!is.na(b$deserve),]
for(i in 1:length(wyears_educ)){
  b2=subset(b1,b1$year==wyears_educ[i])
  s[i]=(mean(b2$deserve[b2$degree==1],na.rm=T))
}
sochelp <- c(s[1],NA,s[2],NA,s[3],NA,s[4:7],NA,s[8:15],NA,s[16:28])
sochelp.la <- na.approx(sochelp)
sochelp.la.deg <- c(NA,NA,sochelp.la)


## Spend more, educ  
w=c()
b1 <- b[!is.na(b$spendmore),]
for(i in 1:length(wyears_educ)){
  b2=subset(b1,b1$year==wyears_educ[i])
  w[i]=(mean(b2$spendmore[b2$school==1],na.rm=T))
}
spendmore <- c(w[1],NA,w[2],NA,w[3],NA,w[4:7],NA,w[8:15],NA,w[16:28])
spendmore.la <- na.approx(spendmore)
spendmore.la.sch <- c(NA,NA,spendmore.la)

w=c()
b1 <- b[!is.na(b$spendmore),]
for(i in 1:length(wyears_educ)){
  b2=subset(b1,b1$year==wyears_educ[i])
  w[i]=(mean(b2$spendmore[b2$highed==1],na.rm=T))
}
spendmore <- c(w[1],NA,w[2],NA,w[3],NA,w[4:7],NA,w[8:15],NA,w[16:28])
spendmore.la <- na.approx(spendmore)
spendmore.la.he <- c(NA,NA,spendmore.la)

w=c()
b1 <- b[!is.na(b$spendmore),]
for(i in 1:length(wyears_educ)){
  b2=subset(b1,b1$year==wyears_educ[i])
  w[i]=(mean(b2$spendmore[b2$degree==1],na.rm=T))
}
spendmore <- c(w[1],NA,w[2],NA,w[3],NA,w[4:7],NA,w[8:15],NA,w[16:28])
spendmore.la <- na.approx(spendmore)
spendmore.la.deg <- c(NA,NA,spendmore.la)


ed_data <- as.data.frame(cbind(
  c(redist.la.deg,redist.la.he,redist.la.sch,onfeet.la.deg,onfeet.la.he,onfeet.la.sch,
    fiddling.la.deg,fiddling.la.he,fiddling.la.sch,sochelp.la.deg,sochelp.la.he,sochelp.la.sch,
    spendmore.la.deg,spendmore.la.he,spendmore.la.sch),
  c(rep(c(1985:2019),15)), 
  c(rep(c(rep("Degree",35),rep("Higher Education,\nBelow Degree",35),rep("Secondary School\nor below",35)),5)),  #check 
  c(rep("Government should redistribute\n income from the rich to the poor",105),
    rep("Benefits stop people standing on\n their own two feet",105),
    rep("Most people on the dole are\n fiddling one way or another",105),
    rep("Most benefits recipients don't\n deserve help",105),
    rep("Government should spend more money\n on welfare benefits for the poor,\neven if it leads to higher taxes",105))
))
colnames(ed_data) <- c("numbers","year","education","variable")
ed_data$numbers <- as.numeric(ed_data$numbers)
ed_data$year <- as.numeric(ed_data$year)


# plot
pdf(file = paste0(plot.dir, "fig1a.pdf"), height=4.5,width=7.5)
ggplot(ed_data, aes(x=year,linetype=education)) +
  geom_line(aes(y=numbers)) +
  scale_linetype_manual(values=c("solid", "dotted","dashed")) +
  facet_wrap(~variable) +
  theme_bw() +
  scale_y_continuous(name="Mean Response (higher = agree more)") +
  scale_x_continuous(name="") +
  theme(legend.position=c(.85,.25),legend.title=element_blank(), 
        legend.background = element_blank(),
        panel.grid.minor = element_blank(),
        legend.box.background = element_rect(colour = "black")) +
  guides(linetype=guide_legend(ncol=1)) +
  theme(strip.text = element_text(size=9))
dev.off()

rm(redist.la.deg,redist.la.he,redist.la.sch,onfeet.la.deg,onfeet.la.he,onfeet.la.sch,
   fiddling.la.deg,fiddling.la.he,fiddling.la.sch,sochelp.la.deg,sochelp.la.he,sochelp.la.sch,
   spendmore.la.deg,spendmore.la.he,spendmore.la.sch,redist,onfeet,fiddling,sochelp,spendmore,fi,r,s,w,wf,
   fiddling.la,onfeet.la,redist.la,sochelp.la,spendmore.la,b2,i,ryears_educ,wyears_educ,wfyears_educ)


## Age (fig 1b)  

## redist, age
r=c()
b1 <- b[!is.na(b$redist),]
for(i in 1:length(ryears)){
  b2=subset(b1,b1$year==ryears[i])
  r[i]=(mean(b2$redist[b2$ragecat==1],na.rm=T))
}
redist <- c(r[1:2],NA,r[3:5],NA,r[6:9],NA,r[10:32])
redist.la <- na.approx(redist)
redist.la.1824 <- c(NA,redist.la)

r=c()
b1 <- b[!is.na(b$redist),]
for(i in 1:length(ryears)){
  b2=subset(b1,b1$year==ryears[i])
  r[i]=(mean(b2$redist[b2$ragecat==4],na.rm=T))
}
redist <- c(r[1:2],NA,r[3:5],NA,r[6:9],NA,r[10:32])
redist.la <- na.approx(redist)
redist.la.4554 <- c(NA,redist.la)

r=c()
b1 <- b[!is.na(b$redist),]
for(i in 1:length(ryears)){
  b2=subset(b1,b1$year==ryears[i])
  r[i]=(mean(b2$redist[b2$ragecat==7],na.rm=T))
}
redist <- c(r[1:2],NA,r[3:5],NA,r[6:9],NA,r[10:32])
redist.la <- na.approx(redist)
redist.la.65p <- c(NA,redist.la)




## Own two feet, age
wf=c()
b1 <- b[!is.na(b$feet),]
for(i in 1:length(wfyears)){
  b2=subset(b1,b1$year==wfyears[i])
  wf[i]=(mean(b2$feet[b2$ragecat==1],na.rm=T))
}
onfeet <- c(wf[1],NA,wf[2],NA,wf[3],NA,wf[4:7],NA,wf[8:32])
onfeet.la <- na.approx(onfeet)
onfeet.la.1824 <- c(NA,NA,onfeet.la)


wf=c()
b1 <- b[!is.na(b$feet),]
for(i in 1:length(wfyears)){
  b2=subset(b1,b1$year==wfyears[i])
  wf[i]=(mean(b2$feet[b2$ragecat==4],na.rm=T))
}
onfeet <- c(wf[1],NA,wf[2],NA,wf[3],NA,wf[4:7],NA,wf[8:32])
onfeet.la <- na.approx(onfeet)
onfeet.la.4554 <- c(NA,NA,onfeet.la)

wf=c()
b1 <- b[!is.na(b$feet),]
for(i in 1:length(wfyears)){
  b2=subset(b1,b1$year==wfyears[i])
  wf[i]=(mean(b2$feet[b2$ragecat==7],na.rm=T))
}
onfeet <- c(wf[1],NA,wf[2],NA,wf[3],NA,wf[4:7],NA,wf[8:32])
onfeet.la <- na.approx(onfeet)
onfeet.la.65p <- c(NA,NA,onfeet.la)



## Fiddling, age
fi=c()
b1 <- b[!is.na(b$fiddle),]
for(i in 1:length(wfyears)){
  b2=subset(b1,b1$year==wfyears[i])
  fi[i]=(mean(b2$fiddle[b2$ragecat==1],na.rm=T))
}

fiddling <- c(fi[1],NA,fi[2],NA,fi[3],NA,fi[4:7],NA,fi[8:30])
fiddling.la <- na.approx(fiddling)
fiddling.la.1824 <- c(NA,NA,fiddling.la)


fi=c()
b1 <- b[!is.na(b$fiddle),]
for(i in 1:length(wfyears)){
  b2=subset(b1,b1$year==wfyears[i])
  fi[i]=(mean(b2$fiddle[b2$ragecat==4],na.rm=T))
}

fiddling <- c(fi[1],NA,fi[2],NA,fi[3],NA,fi[4:7],NA,fi[8:30])
fiddling.la <- na.approx(fiddling)
fiddling.la.4554 <- c(NA,NA,fiddling.la)

fi=c()
b1 <- b[!is.na(b$fiddle),]
for(i in 1:length(wfyears)){
  b2=subset(b1,b1$year==wfyears[i])
  fi[i]=(mean(b2$fiddle[b2$ragecat==7],na.rm=T))
}

fiddling <- c(fi[1],NA,fi[2],NA,fi[3],NA,fi[4:7],NA,fi[8:30])
fiddling.la <- na.approx(fiddling)
fiddling.la.65p <- c(NA,NA,fiddling.la)



## Deserving, age 
s=c()
b1 <- b[!is.na(b$deserve),]
for(i in 1:length(wyears)){
  b2=subset(b1,b1$year==wyears[i])
  s[i]=(mean(b2$deserve[b2$ragecat==1],na.rm=T))
}
sochelp <- c(s[1],NA,s[2],NA,s[3],NA,s[4:7],NA,s[8:15],NA,s[16:29])
sochelp.la <- na.approx(sochelp)
sochelp.la.1824 <- c(NA,NA,sochelp.la)


s=c()
b1 <- b[!is.na(b$deserve),]
for(i in 1:length(wyears)){
  b2=subset(b1,b1$year==wyears[i])
  s[i]=(mean(b2$deserve[b2$ragecat==4],na.rm=T))
}
sochelp <- c(s[1],NA,s[2],NA,s[3],NA,s[4:7],NA,s[8:15],NA,s[16:29])
sochelp.la <- na.approx(sochelp)
sochelp.la.4554 <- c(NA,NA,sochelp.la)

s=c()
b1 <- b[!is.na(b$deserve),]
for(i in 1:length(wyears)){
  b2=subset(b1,b1$year==wyears[i])
  s[i]=(mean(b2$deserve[b2$ragecat==7],na.rm=T))
}
sochelp <- c(s[1],NA,s[2],NA,s[3],NA,s[4:7],NA,s[8:15],NA,s[16:29])
sochelp.la <- na.approx(sochelp)
sochelp.la.65p <- c(NA,NA,sochelp.la)


## Spend more, age  
w=c()
b1 <- b[!is.na(b$spendmore),]
for(i in 1:length(wyears)){
  b2=subset(b1,b1$year==wyears[i])
  w[i]=(mean(b2$spendmore[b2$ragecat==1],na.rm=T))
}
spendmore <- c(w[1],NA,w[2],NA,w[3],NA,w[4:7],NA,w[8:15],NA,w[16:29])
spendmore.la <- na.approx(spendmore)
spendmore.la.1824 <- c(NA,NA,spendmore.la)


w=c()
b1 <- b[!is.na(b$spendmore),]
for(i in 1:length(wyears)){
  b2=subset(b1,b1$year==wyears[i])
  w[i]=(mean(b2$spendmore[b2$ragecat==4],na.rm=T))
}
spendmore <- c(w[1],NA,w[2],NA,w[3],NA,w[4:7],NA,w[8:15],NA,w[16:29])
spendmore.la <- na.approx(spendmore)
spendmore.la.4554 <- c(NA,NA,spendmore.la)

w=c()
b1 <- b[!is.na(b$spendmore),]
for(i in 1:length(wyears)){
  b2=subset(b1,b1$year==wyears[i])
  w[i]=(mean(b2$spendmore[b2$ragecat==7],na.rm=T))
}
spendmore <- c(w[1],NA,w[2],NA,w[3],NA,w[4:7],NA,w[8:15],NA,w[16:29])
spendmore.la <- na.approx(spendmore)
spendmore.la.65p <- c(NA,NA,spendmore.la)


## plot, age
age_data <- as.data.frame(cbind(
  c(redist.la.65p,redist.la.4554,redist.la.1824,onfeet.la.65p,onfeet.la.4554,onfeet.la.1824,
    fiddling.la.65p,fiddling.la.4554,fiddling.la.1824,sochelp.la.65p,sochelp.la.4554,sochelp.la.1824,
    spendmore.la.65p,spendmore.la.4554,spendmore.la.1824),
  c(rep(c(1985:2020),15)),  #check
  c(rep(c(rep("Aged 65+",36),rep("Aged 45-54",36),rep("Aged 18-24",36)),5)),  
  c(rep("Government should redistribute\n income from the rich to the poor",108),
    rep("Benefits stop people standing on\n their own two feet",108),
    rep("Most people on the dole are\n fiddling one way or another",108),
    rep("Most benefits recipients don't\n deserve help",108),
    rep("Government should spend more money\n on welfare benefits for the poor,\neven if it leads to higher taxes",108))
))
colnames(age_data) <- c("numbers","year","age","variable")
age_data$numbers <- as.numeric(age_data$numbers)
age_data$year <- as.numeric(age_data$year)


pdf(file = paste0(plot.dir, "fig1b.pdf"), height=4.5,width=7.5)
ggplot(age_data, aes(x=year,linetype=age)) +
  geom_line(aes(y=numbers)) +
  scale_linetype_manual(values=c("solid","dashed","dotted")) +
  facet_wrap(~variable) +
  theme_bw() +
  scale_y_continuous(name="Mean Response (higher = agree more)",limits=c(2.05,3.75)) +
  scale_x_continuous(name="") +
  theme(legend.position=c(.85,.25),legend.title=element_blank(),
        legend.background = element_blank(),
        panel.grid.minor = element_blank(),
        legend.box.background = element_rect(colour = "black")) +
  guides(linetype=guide_legend(ncol=1)) +
  theme(strip.text = element_text(size=9))
dev.off()

rm(redist.la.1824,redist.la.4554,redist.la.65p,onfeet.la.1824,onfeet.la.4554,onfeet.la.65p,
   fiddling.la.1824,fiddling.la.4554,fiddling.la.65p,sochelp.la.1824,sochelp.la.4554,sochelp.la.65p,
   spendmore.la.1824,spendmore.la.4554,spendmore.la.65p,redist,onfeet,fiddling,sochelp,spendmore,fi,r,s,w,wf,
   fiddling.la,onfeet.la,redist.la,sochelp.la,spendmore.la,b2,i,ryears,wfyears,wyears,
   age_data,b,b1,ed_data)



#######################################################
## 2. CROSS-COUNTRY PATTERNS BY GROUP (ISSP, FIGURE 2)
#######################################################
# Inequality, 2019  
e <- read.dta13(paste0(data.dir,"isspineq19.dta"),convert.factors=F)

e <- e[e$country==36|e$country==40|e$country==203|e$country==208|e$country==246|e$country==250|e$country==276|e$country==380|
         e$country==392|e$country==440|e$country==554|e$country==578|e$country==705|e$country==752|e$country==756|e$country==826|e$country==840,]

# govt responsibility to redistribute [higher = more agree]
e$redist <- NA
e$redist[e$v22==1] <- 5
e$redist[e$v22==2] <- 4
e$redist[e$v22==3] <- 3
e$redist[e$v22==4] <- 2
e$redist[e$v22==5] <- 1

# govt responsibility for unemployed  [higher = more agree]
e$unemp <- NA
e$unemp[e$v23==1] <- 5
e$unemp[e$v23==2] <- 4
e$unemp[e$v23==3] <- 3
e$unemp[e$v23==4] <- 2
e$unemp[e$v23==5] <- 1

## Education
e$DEGREE[e$DEGREE<0] <- NA
e$degree <- ifelse(e$DEGREE>4,1,0)
e$highed <- ifelse(e$DEGREE==4,1,0)
e$school <- ifelse(e$DEGREE>(-1) & e$DEGREE<4,1,0)

cnames <- c("Australia", "Austria", "Czech Republic", "Denmark", "Finland", "France", 
            "Germany", "Italy","Japan","Lithuania", "New Zealand", "Norway", "Slovenia", "Sweden", "Switzerland", 
            "United Kingdom", "United States")

dset <- matrix(NA,nrow=length(unique(e$country)),ncol=4)
for(i in 1:length(unique(e$country))){
  diff <- t.test(e$redist[e$degree==1 & e$country==sort(unique(e$country))[i]] , 
                 e$redist[e$school==1 & e$country==sort(unique(e$country))[i]])
  dset[i,] <- c(diff$estimate[1] - diff$estimate[2], diff$conf.int,cnames[i])
}              
dset <- as.data.frame(dset)


colnames(dset) <- c("diff","lower","upper","country")
dset$diff <- as.numeric(dset$diff)
dset$lower <- as.numeric(dset$lower)
dset$upper <- as.numeric(dset$upper)
dset <- dset[order(dset$diff,decreasing=T),]



## ENVIRONMENT, 2020 ##
env <- read.dta13(paste0(data.dir,"isspenv20.dta"),convert.factors=F)

env <- env[env$country==40|env$country==208|env$country==246|env$country==276|env$country==392|
             env$country==554|env$country==705|env$country==756,]

# govt responsibility to redistribute [higher = more agree]
env$redist <- NA
env$redist[env$v22==1] <- 5
env$redist[env$v22==2] <- 4
env$redist[env$v22==3] <- 3
env$redist[env$v22==4] <- 2
env$redist[env$v22==5] <- 1

## Education
env$EDULEVEL[env$DEGREE<0] <- NA
env$degree <- ifelse(env$EDULEVEL>4,1,0)
env$school <- ifelse(env$EDULEVEL>(-1) & env$EDULEVEL<4,1,0)

cnames <- c("Austria", "Denmark", "Finland", "Germany", "Japan", "New Zealand","Slovenia","Switzerland")

dset_env <- matrix(NA,nrow=length(unique(env$country)),ncol=4)
for(i in 1:length(unique(env$country))){
  diff <- t.test(env$redist[env$degree==1 & env$country==sort(unique(env$country))[i]] , 
                 env$redist[env$school==1 & env$country==sort(unique(env$country))[i]])
  dset_env[i,] <- c(diff$estimate[1] - diff$estimate[2], diff$conf.int,cnames[i])
}              
dset_env <- as.data.frame(dset_env)

dset_env <- rbind(dset_env,
                  c(as.data.frame(rbind(
                    c(NA,NA,NA,"United States"),
                    c(NA,NA,NA,"United Kingdom"),
                    c(NA,NA,NA,"Norway"),
                    c(NA,NA,NA,"Lithuania"),
                    c(NA,NA,NA,"Italy"),
                    c(NA,NA,NA,"Sweden"),
                    c(NA,NA,NA,"France"),
                    c(NA,NA,NA,"Czech Republic"),
                    c(NA,NA,NA,"Australia")
                  ))))


colnames(dset_env) <- c("diff","lower","upper","country")
dset_env$diff <- as.numeric(dset_env$diff)
dset_env$lower <- as.numeric(dset_env$lower)
dset_env$upper <- as.numeric(dset_env$upper)

dset_env$toorder <- rep(NA,length(dset_env[,1]))
for(i in 1:17){
  dset_env$toorder[i] <- dset$diff[which(dset$country==dset_env$country[i])]
}
dset_env <- dset_env[order(dset_env$toorder,decreasing=T),]
dset_env <- dset_env[,1:4]

# latest of each dataset
educ_both <- cbind(dset,dset_env[,1:3])
colnames(educ_both)[5:7] <- c("diff_comb","lower_comb","upper_comb")

for(i in 1:length(educ_both$diff_comb)){
  if(is.na(educ_both$diff_comb[i])){
    educ_both$diff_comb[i] <- educ_both$diff[i]
    educ_both$lower_comb[i] <- educ_both$lower[i]
    educ_both$upper_comb[i] <- educ_both$upper[i]
  }
} 

educ_both <- educ_both[order(educ_both$diff_comb,decreasing=T),]
educ_both <- educ_both[,4:7]
educ_both[,5] <- educ_both$country
educ_both <- educ_both[,2:5]
colnames(educ_both) <- c("diff","lower","upper","country")

# now add in places that appeared in 2019
educ_2019pan <- dset

for(i in 1:length(educ_2019pan$diff)){
  if(educ_2019pan$country[i] %in% c("United States","Australia","United Kingdom","Norway","Lithuania",
                                    "Italy","Sweden","France","Czech Republic")){
    educ_2019pan$diff[i] <- NA
    educ_2019pan$lower[i] <- NA
    educ_2019pan$upper[i] <- NA
  }
} 

educ_2019pan$toorder <- rep(NA,length(educ_2019pan[,1]))
for(i in 1:17){
  educ_2019pan$toorder[i] <- educ_both$diff[which(educ_both$country==educ_2019pan$country[i])]
}

educ_2019pan <- educ_2019pan[order(educ_2019pan$toorder,decreasing=T),]
educ_2019pan <- educ_2019pan[,1:4]

educ_both <- rbind(educ_both,educ_2019pan)
educ_both$plotorder <- rep(c(rev(letters[1:17])),2)
educ_both$country <- as.character(educ_both$country)
educ_both$survey <- c(rep("latest of 2019 or 2020",17),rep("2019",17))

educ_both$var <- c(rev(letters[1:17]))
educ_both$survey1 <- c("2019","2019","2020","2019","2020","2020","2019","2020","2020","2020","2020","2019","2019","2019","2019","2020","2019",
                       "2020","2020","2019","2020","2019","2019","2020","2019","2019","2019","2019","2020","2020","2020","2020","2019","2020")

pdf(file = paste0(plot.dir, "fig2.pdf"),height=3.5,width=5)
ggplot(data = educ_both, aes(x = var, y = diff, ymin = lower, ymax = upper, colour=survey1, shape=survey1, fill=survey1)) +
  geom_hline(aes(yintercept=0),size=0.5,linetype="dashed",colour="gray25") +
  geom_errorbar(position = position_dodge(width = 0.5), width = 0, size=0.5, alpha=0.5) +
  geom_point(position = position_dodge(width = 0.5),size=2.4) +
  theme_bw() +
  coord_flip() +
  scale_x_discrete(name="",labels=rev(educ_both$country)) +
  scale_y_continuous(name="Difference between Degree Holders and School Leavers", limits=c(-0.8,0.6)) +
  theme(legend.position = c(0.85,0.15), legend.title = element_blank(), panel.border = element_rect(color="gray"), 
        panel.grid = element_blank())
dev.off()

rm(diff,dset,dset_env,e,educ_2019pan,educ_both,env,cnames,i)


######################################################
## 3. READ IN DATA FOR MRP, PREPARE IT FOR ESTIMATION
######################################################

# BSAS 1996 and local authority covariates
bsas <- read.csv(paste0(data.dir,"bsas_harmon_final.csv"))
covars1996 <- read.csv(paste0(data.dir,"covars1996.csv"))

# recode some bsas names with 1996-8 mergers [which is how the covariates are recorded]
bsas$lad_past_nm_96 <- as.character(bsas$lad_past_nm) %>% str_replace("[.]","") %>% str_replace("city of ","")
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="boothferry"] <- "east riding of yorkshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="holderness"] <- "east riding of yorkshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="cleethorpes"] <- "north east lincolnshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="great grimsby"] <- "north east lincolnshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="rochester upon medway"] <- "medway"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="gillingham"] <- "medway"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="medina"] <- "isle of wight"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="newbury"] <- "west berkshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="brighton"] <- "brighton and hove"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="hove"] <- "brighton and hove"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="woodspring"] <- "north somerset"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="montgomeryshire"] <- "powys"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="carmarthen"] <- "carmarthenshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="llanelli"] <- "carmarthenshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="dinefwr"] <- "carmarthenshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="rhymney valley"] <- "caerphilly"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="islwyn"] <- "caerphilly"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="alyn and deeside"] <- "flintshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="delyn"] <- "flintshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="dunfermline"] <- "fife"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="kirkcaldy"] <- "fife"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="north east fife"] <- "fife"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="ogwr"] <- "the vale of glamorgan"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="vale of glamorgan"] <- "the vale of glamorgan"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="ross and cromarty"] <- "highland"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="east kilbride"] <- "south lanarkshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="hamilton"] <- "south lanarkshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="strathkelvin"] <- "east dunbartonshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="motherwell"] <- "north lanarkshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="cumbernauld and kilsyth"] <- "north lanarkshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="monklands"] <- "north lanarkshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="ettrick and lauderdale"] <- "scottish borders"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="berwickshire"] <- "scottish borders"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="roxburgh"] <- "scottish borders"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="eastwood"] <- "east renfrewshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="renfrew"] <- "renfrewshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="kilmarnock and loudoun"] <- "east ayrshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="langbaurgh-on-tees"] <- "redcar and cleveland"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="kingswood"] <- "south gloucestershire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="arfon"] <- "gwynedd"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="dwyfor"] <- "gwynedd"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="aberconwy"] <- "gwynedd"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="south pembrokeshire"] <- "pembrokeshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="monmouth"] <- "monmouthshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="thamesdown"] <- "swindon"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="taff-ely"] <- "rhondda, cynon, taff"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="cynon valley"] <- "rhondda, cynon, taff"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="dumbarton"] <- "west dunbartonshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="kyle and carrick"] <- "south ayrshire"
bsas$lad_past_nm_96[bsas$lad_past_nm_96=="wrexham maelor"] <- "wrexham"
bsas <- bsas[!bsas$lad_past_nm_96=="pc.not match census",]

covars1996$lad96nm <- as.character(covars1996$lad96nm)
covars1996$lad96nm[184] <- "kingston upon hull"

# standardise local authority covariates
covars1996$la_hhinc <- log(covars1996$la_hhinc)
covars1996$la_socbensrd <- log(covars1996$la_socbensrd)
covars1996$la_houseprice <- log(covars1996$la_houseprice)
covars1996$la_hhinc <- (covars1996$la_hhinc - mean(covars1996$la_hhinc,na.rm=T))/sd(covars1996$la_hhinc,na.rm=T)
covars1996$la_socbensrd <- (covars1996$la_socbensrd - mean(covars1996$la_socbensrd,na.rm=T))/sd(covars1996$la_socbensrd,na.rm=T)
covars1996$la_claimants <- (covars1996$la_claimants - mean(covars1996$la_claimants,na.rm=T))/sd(covars1996$la_claimants,na.rm=T)
covars1996$la_houseprice <- (covars1996$la_houseprice - mean(covars1996$la_houseprice,na.rm=T))/sd(covars1996$la_houseprice,na.rm=T)
covars1996$la_pc1824 <- (covars1996$la_pc1824 - mean(covars1996$la_pc1824,na.rm=T))/sd(covars1996$la_pc1824,na.rm=T)
covars1996$la_pc65plus <- (covars1996$la_pc65plus - mean(covars1996$la_pc65plus,na.rm=T))/sd(covars1996$la_pc65plus,na.rm=T)
covars1996$la_popdensity <- (covars1996$la_popdensity - mean(covars1996$la_popdensity,na.rm=T))/sd(covars1996$la_popdensity,na.rm=T)
covars1996$la_gva <- (covars1996$la_gva- mean(covars1996$la_gva,na.rm=T))/sd(covars1996$la_gva,na.rm=T)
covars1996$la_degree <- (covars1996$la_degree- mean(covars1996$la_degree,na.rm=T))/sd(covars1996$la_degree,na.rm=T)

#join local authority covariates to bsas survey data 
bsas <- left_join(bsas, covars1996 %>% rename(lad_past_nm_96=lad96nm), by="lad_past_nm_96")

# recode so that higher = more conservative, sort out NAs, create outcome variables
bsas <- bsas %>%  mutate(redistrb = as.factor(redistrb),welffeet=as.factor(welffeet),
                         dolefidl=as.factor(dolefidl),morewelf=as.factor(morewelf),sochelp=as.factor(sochelp)) %>%
  mutate(redistrb = as.numeric(redistrb),welffeet=as.numeric(welffeet),
         dolefidl=as.numeric(dolefidl),morewelf=as.numeric(morewelf),sochelp=as.numeric(sochelp)) %>% 
  mutate(redistrb = recode(redistrb, `4` = 24), redistrb = recode(redistrb, `3` = 23),
         redistrb = recode(redistrb, `5` = 25), redistrb = recode(redistrb, `1` = 21),
         redistrb = recode(redistrb, `2` = 22)) %>%
  mutate(redistrb = recode(redistrb, `24` = 1), redistrb = recode(redistrb, `23` = 2),
         redistrb = recode(redistrb, `25` = 3), redistrb = recode(redistrb, `21` = 4),
         redistrb = recode(redistrb, `22` = 5)) %>%
  mutate(welffeet = na_if(welffeet, 6), welffeet = na_if(welffeet, 9)) %>%
  mutate(welffeet = recode(welffeet, `1` = 21), welffeet = recode(welffeet, `2` = 22),
         welffeet = recode(welffeet, `3` = 23), welffeet = recode(welffeet, `4` = 24),
         welffeet = recode(welffeet, `5` = 25), welffeet = recode(welffeet, `8` = 28),
         welffeet = recode(welffeet, `7` = 27)) %>%
  mutate(welffeet = recode(welffeet, `21` = 2), welffeet = recode(welffeet, `22` = 1),
         welffeet = recode(welffeet, `23` = 4), welffeet = recode(welffeet, `24` = 5),
         welffeet = recode(welffeet, `25` = 5), welffeet = recode(welffeet, `28` = 3),
         welffeet = recode(welffeet, `27` = 3)) %>%
  mutate(dolefidl = recode(dolefidl, `1` = 21), dolefidl = recode(dolefidl, `2` = 22),
         dolefidl = recode(dolefidl, `3` = 23), dolefidl = recode(dolefidl, `4` = 24),
         dolefidl = recode(dolefidl, `5` = 25), dolefidl = recode(dolefidl, `8` = 28)) %>%
  mutate(dolefidl = recode(dolefidl, `21` = 2), dolefidl = recode(dolefidl, `22` = 1),
         dolefidl = recode(dolefidl, `23` = 4), dolefidl = recode(dolefidl, `24` = 5),
         dolefidl = recode(dolefidl, `25` = 3)) %>%
  mutate(morewelf = recode(morewelf, `1` = 21), morewelf = recode(morewelf, `2` = 22),
         morewelf = recode(morewelf, `3` = 23), morewelf = recode(morewelf, `4` = 24),
         morewelf = recode(morewelf, `5` = 25)) %>%
  mutate(morewelf = recode(morewelf, `21` = 4), morewelf = recode(morewelf, `22` = 5),
         morewelf = recode(morewelf, `23` = 2), morewelf = recode(morewelf, `24` = 1),
         morewelf = recode(morewelf, `25` = 3)) %>%
  mutate(sochelp = recode(sochelp, `1` = 21), sochelp = recode(sochelp, `2` = 22),
         sochelp = recode(sochelp, `3` = 23), sochelp = recode(sochelp, `4` = 24),
         sochelp = recode(sochelp, `5` = 25)) %>%
  mutate(sochelp = recode(sochelp, `21` = 2), sochelp = recode(sochelp, `22` = 1),
         sochelp = recode(sochelp, `23` = 4), sochelp = recode(sochelp, `24` = 5),
         sochelp = recode(sochelp, `25` = 3))

bsas <- bsas %>% filter(complete.cases(welffeet,dolefidl,sochelp,redistrb,morewelf))
bsas$combo <- (bsas$welffeet + bsas$dolefidl + bsas$sochelp + bsas$redistrb + bsas$morewelf)/5
bsas$deserve <- (bsas$welffeet + bsas$dolefidl + bsas$sochelp)/3
bsas$redist <- (bsas$redistrb + bsas$morewelf)/2

# figures quoted in the paper for means
summary(bsas$combo)
summary(bsas$deserve)
summary(bsas$redist)

# code categories for mrp estimation
bsas$female <- ifelse(bsas$rsex=="female",1,0)
bsas$rage[bsas$rage==99] <- NA
bsas$age1819 <- ifelse(bsas$rage==18|bsas$rage==19,1,0)
bsas$age2029 <- ifelse(bsas$rage>19 & bsas$rage<30,1,0)*2
bsas$age3039 <- ifelse(bsas$rage>29 & bsas$rage<40,1,0)*3
bsas$age4049 <- ifelse(bsas$rage>39 & bsas$rage<50,1,0)*4
bsas$age5059 <- ifelse(bsas$rage>49 & bsas$rage<60,1,0)*5
bsas$age6069 <- ifelse(bsas$rage>59 & bsas$rage<70,1,0)*6
bsas$age7079 <- ifelse(bsas$rage>69 & bsas$rage<80,1,0)*7
bsas$age80pl <- ifelse(bsas$rage>79,1,0)*8
bsas <- bsas %>% mutate(age_cat = age1819+age2029+age3039+age4049+
                          age5059+age6069+age7079+age80pl)


# YOUGOV 2020 and local authority covariates
yougov <- read.csv(paste0(data.dir,"yougov_harmon_final.csv"))
covars2020 <- read.csv(paste0(data.dir,"covars2020.csv")) %>% select(-rgn19nm)


# standardise local authority covariates
covars2020$la_hhinc <- log(covars2020$la_hhinc)
covars2020$la_socbensrd <- log(covars2020$la_socbensrd)
covars2020$la_houseprice <- log(covars2020$la_houseprice)
covars2020$la_hhinc <- (covars2020$la_hhinc - mean(covars2020$la_hhinc,na.rm=T))/sd(covars2020$la_hhinc,na.rm=T)
covars2020$la_socbensrd <- (covars2020$la_socbensrd - mean(covars2020$la_socbensrd,na.rm=T))/sd(covars2020$la_socbensrd,na.rm=T)
covars2020$la_claimants <- (covars2020$la_claimants - mean(covars2020$la_claimants,na.rm=T))/sd(covars2020$la_claimants,na.rm=T)
covars2020$la_houseprice <- (covars2020$la_houseprice - mean(covars2020$la_houseprice,na.rm=T))/sd(covars2020$la_houseprice,na.rm=T)
covars2020$la_pc1824 <- (covars2020$la_pc1824 - mean(covars2020$la_pc1824,na.rm=T))/sd(covars2020$la_pc1824,na.rm=T)
covars2020$la_pc65plus <- (covars2020$la_pc65plus - mean(covars2020$la_pc65plus,na.rm=T))/sd(covars2020$la_pc65plus,na.rm=T)
covars2020$la_popdensity <- (covars2020$la_popdensity - mean(covars2020$la_popdensity,na.rm=T))/sd(covars2020$la_popdensity,na.rm=T)
covars2020$la_gva <- (covars2020$la_gva- mean(covars2020$la_gva,na.rm=T))/sd(covars2020$la_gva,na.rm=T)
covars2020$la_degree <- (covars2020$la_prop_degree- mean(covars2020$la_prop_degree,na.rm=T))/sd(covars2020$la_prop_degree,na.rm=T)


# covars are recorded after the merger of four LAs into buckinghamshire
yougov$lad19nm[yougov$lad19nm=="chiltern"] <- "buckinghamshire"
yougov$lad19nm[yougov$lad19nm=="aylesbury vale"] <- "buckinghamshire"
yougov$lad19nm[yougov$lad19nm=="south bucks"] <- "buckinghamshire"
yougov$lad19nm[yougov$lad19nm=="wycombe"] <- "buckinghamshire"

#join local authority covariates to yougov survey data
yougov <- left_join(yougov,covars2020 , by="lad19nm")

# recode so that higher = more conservative, create outcome variables
yougov <- yougov %>%  mutate(redistrb = as.factor(redistrb),welffeet=as.factor(welffeet),
                             dolefidl=as.factor(dolefidl),morewelf=as.factor(morewelf),sochelp=as.factor(sochelp)) %>% 
  mutate(redistrb = as.numeric(redistrb),welffeet=as.numeric(welffeet),
         dolefidl=as.numeric(dolefidl),morewelf=as.numeric(morewelf),sochelp=as.numeric(sochelp)) %>% 
  mutate(redistrb = recode(redistrb, `4` = 24), redistrb = recode(redistrb, `3` = 23),
         redistrb = recode(redistrb, `5` = 25), redistrb = recode(redistrb, `1` = 21),
         redistrb = recode(redistrb, `2` = 22)) %>%
  mutate(redistrb = recode(redistrb, `24` = 1), redistrb = recode(redistrb, `23` = 2),
         redistrb = recode(redistrb, `25` = 3), redistrb = recode(redistrb, `21` = 4),
         redistrb = recode(redistrb, `22` = 5)) %>%
  mutate(welffeet = recode(welffeet, `1` = 21), welffeet = recode(welffeet, `2` = 22),
         welffeet = recode(welffeet, `3` = 23), welffeet = recode(welffeet, `4` = 24),
         welffeet = recode(welffeet, `5` = 25)) %>%
  mutate(welffeet = recode(welffeet, `21` = 2), welffeet = recode(welffeet, `22` = 1),
         welffeet = recode(welffeet, `23` = 4), welffeet = recode(welffeet, `24` = 5),
         welffeet = recode(welffeet, `25` = 3)) %>%
  mutate(dolefidl = recode(dolefidl, `1` = 21), dolefidl = recode(dolefidl, `2` = 22),
         dolefidl = recode(dolefidl, `3` = 23), dolefidl = recode(dolefidl, `4` = 24),
         dolefidl = recode(dolefidl, `5` = 25), dolefidl = recode(dolefidl, `8` = 28)) %>%
  mutate(dolefidl = recode(dolefidl, `21` = 2), dolefidl = recode(dolefidl, `22` = 1),
         dolefidl = recode(dolefidl, `23` = 4), dolefidl = recode(dolefidl, `24` = 5),
         dolefidl = recode(dolefidl, `25` = 3)) %>%
  mutate(morewelf = recode(morewelf, `1` = 21), morewelf = recode(morewelf, `2` = 22),
         morewelf = recode(morewelf, `3` = 23), morewelf = recode(morewelf, `4` = 24),
         morewelf = recode(morewelf, `5` = 25)) %>%
  mutate(morewelf = recode(morewelf, `21` = 4), morewelf = recode(morewelf, `22` = 5),
         morewelf = recode(morewelf, `23` = 2), morewelf = recode(morewelf, `24` = 1),
         morewelf = recode(morewelf, `25` = 3)) %>%
  mutate(sochelp = recode(sochelp, `1` = 21), sochelp = recode(sochelp, `2` = 22),
         sochelp = recode(sochelp, `3` = 23), sochelp = recode(sochelp, `4` = 24),
         sochelp = recode(sochelp, `5` = 25)) %>%
  mutate(sochelp = recode(sochelp, `21` = 2), sochelp = recode(sochelp, `22` = 1),
         sochelp = recode(sochelp, `23` = 4), sochelp = recode(sochelp, `24` = 5),
         sochelp = recode(sochelp, `25` = 3))

yougov <- yougov %>% filter(complete.cases(welffeet,dolefidl,sochelp,redistrb,morewelf))
yougov$combo <- (yougov$welffeet + yougov$dolefidl + yougov$sochelp + yougov$redistrb + yougov$morewelf)/5
yougov$deserve <- (yougov$welffeet + yougov$dolefidl + yougov$sochelp)/3
yougov$redist <- (yougov$redistrb + yougov$morewelf)/2

# figures quoted in the paper for means
summary(yougov$combo)
summary(yougov$deserve)
summary(yougov$redist)

# code categories for mrp estimation
yougov$female <- ifelse(yougov$rsex=="female",1,0)
yougov$age1819 <- ifelse(yougov$rage==18|yougov$rage==19,1,0)
yougov$age2029 <- ifelse(yougov$rage>19 & yougov$rage<30,1,0)*2
yougov$age3039 <- ifelse(yougov$rage>29 & yougov$rage<40,1,0)*3
yougov$age4049 <- ifelse(yougov$rage>39 & yougov$rage<50,1,0)*4
yougov$age5059 <- ifelse(yougov$rage>49 & yougov$rage<60,1,0)*5
yougov$age6069 <- ifelse(yougov$rage>59 & yougov$rage<70,1,0)*6
yougov$age7079 <- ifelse(yougov$rage>69 & yougov$rage<80,1,0)*7
yougov$age80pl <- ifelse(yougov$rage>79,1,0)*8
yougov <- yougov %>% mutate(age_cat = age1819+age2029+age3039+age4049+
                              age5059+age6069+age7079+age80pl)


# Inter-item correlations (figure 3)
pdf(file = paste0(plot.dir, "fig3a.pdf"),width=4,height=4.5)
corrplot(cor(bsas %>% 
               select(redistrb,morewelf,dolefidl,sochelp,welffeet),use="complete.obs"), 
         method="number",cl.cex=0.7,title="BSAS, 1994-6",cl.pos="n",mar=c(0,0,1,0))
dev.off()

pdf(file = paste0(plot.dir, "fig3b.pdf"),width=4,height=4.5)
corrplot(cor(yougov %>% 
               select(redistrb,morewelf,dolefidl,sochelp,welffeet),use="complete.obs"), 
         method="number",cl.cex=0.7,title="YouGov, 2020",cl.pos="n",mar=c(0,0,1,0))
dev.off()


####################################
## 3. SUPPORT BY GROUP (FIGURE 4)
####################################

confint <- function(x,var){
  myvals <- c( mean(x[which(var==1)]) - 1.96*(sd(x[which(var==1)])/sqrt(length(x[which(var==1)]))),
               mean(x[which(var==1)]) + 1.96*(sd(x[which(var==1)])/sqrt(length(x[which(var==1)]))))
  return(c(mean(x[which(var==1)]),myvals))
}


# Education
bsas$degree <- ifelse(bsas$hedqual=="degree",1,0)
bsas$secondary <- ifelse(bsas$hedqual=="cse or equivalent"|bsas$hedqual=="no qualifications"|bsas$hedqual=="o level/equivalent"|
                           bsas$hedqual=="a level/equivalent",1,0)

yougov$degree <- ifelse(yougov$hedqual=="university or cnaa first degree (e.g. ba, b.sc, b.ed)"|
                          yougov$hedqual=="university or cnaa higher degree (e.g. m.sc, ph.d)",1,0)
yougov$secondary <- ifelse(yougov$hedqual=="cse grade 1, gce o level, gcse, school certificate"|
                             yougov$hedqual=="cse grades 2-5"|yougov$hedqual=="gce a level or higher certificate"|
                             yougov$hedqual=="no formal qualifications",1,0)

# Income
bsas$lowincome <- ifelse(bsas$hhincome=="q ls thn 3999"|bsas$hhincome=="t 4000- 5999",1,0)
bsas$highincome <- ifelse(bsas$hhincome=="n 41000 or more"|bsas$hhincome=="p 38000- 40999"|bsas$hhincome=="g 35000- 37999"|
                            bsas$hhincome=="c 32000- 34999"|bsas$hhincome=="h 29000- 31999"|bsas$hhincome=="d 26000- 28999",1,0)

yougov$lowincome <- ifelse(yougov$hhincome=="under £5,000 per year"|yougov$hhincome=="£5,000 to £9,999 per year"|
                             yougov$hhincome=="£10,000 to £14,999 per year"|yougov$hhincome=="£15,000 to £19,999 per year",1,0)
yougov$highincome <- ifelse(yougov$hhincome=="£150,000 and over"|yougov$hhincome=="£100,000 to £149,999 per year"|
                              yougov$hhincome=="£70,000 to £99,999 per year"|yougov$hhincome=="£60,000 to £69,999 per year"|
                              yougov$hhincome=="£50,000 to £59,999 per year",1,0)


# age
bsas$rage[bsas$rage==99] <- NA
bsas$eighteen <- ifelse(bsas$rage<25,1,0)
bsas$sixtyfive <- ifelse(bsas$rage>64,1,0)

yougov$eighteen <- ifelse(yougov$rage<25,1,0)
yougov$sixtyfive <- ifelse(yougov$rage>64,1,0)


dataset <- as.data.frame(rbind(
  c("Low\nIncome",confint(bsas$combo,bsas$lowincome)),
  c("Low\nIncome",confint(yougov$combo,yougov$lowincome)),
  c("High\nIncome",confint(bsas$combo,bsas$highincome)),
  c("High\nIncome",confint(yougov$combo,yougov$highincome)),
  c("Secondary\nEducation",confint(bsas$combo,bsas$secondary)),
  c("Secondary\nEducation",confint(yougov$combo,yougov$secondary)),
  c("University\nDegree",confint(bsas$combo,bsas$degree)),
  c("University\nDegree",confint(yougov$combo,yougov$degree)),
  c("Aged\n18-24",confint(bsas$combo,bsas$eighteen)),
  c("Aged\n18-24",confint(yougov$combo,yougov$eighteen)),
  c("Aged\n65+",confint(bsas$combo,bsas$sixtyfive)),
  c("Aged\n65+",confint(yougov$combo,yougov$sixtyfive)),
  
  c("Low\nIncome",confint(bsas$deserve,bsas$lowincome)),
  c("Low\nIncome",confint(yougov$deserve,yougov$lowincome)),
  c("High\nIncome",confint(bsas$deserve,bsas$highincome)),
  c("High\nIncome",confint(yougov$deserve,yougov$highincome)),
  c("Secondary\nEducation",confint(bsas$deserve,bsas$secondary)),
  c("Secondary\nEducation",confint(yougov$deserve,yougov$secondary)),
  c("University\nDegree",confint(bsas$deserve,bsas$degree)),
  c("University\nDegree",confint(yougov$deserve,yougov$degree)),
  c("Aged\n18-24",confint(bsas$deserve,bsas$eighteen)),
  c("Aged\n18-24",confint(yougov$deserve,yougov$eighteen)),
  c("Aged\n65+",confint(bsas$deserve,bsas$sixtyfive)),
  c("Aged\n65+",confint(yougov$deserve,yougov$sixtyfive)),
  
  c("Low\nIncome",confint(bsas$redist,bsas$lowincome)),
  c("Low\nIncome",confint(yougov$redist,yougov$lowincome)),
  c("High\nIncome",confint(bsas$redist,bsas$highincome)),
  c("High\nIncome",confint(yougov$redist,yougov$highincome)),
  c("Secondary\nEducation",confint(bsas$redist,bsas$secondary)),
  c("Secondary\nEducation",confint(yougov$redist,yougov$secondary)),
  c("University\nDegree",confint(bsas$redist,bsas$degree)),
  c("University\nDegree",confint(yougov$redist,yougov$degree)),
  c("Aged\n18-24",confint(bsas$redist,bsas$eighteen)),
  c("Aged\n18-24",confint(yougov$redist,yougov$eighteen)),
  c("Aged\n65+",confint(bsas$redist,bsas$sixtyfive)),
  c("Aged\n65+",confint(yougov$redist,yougov$sixtyfive))
))


colnames(dataset)=c("variable","mean","lower","upper")
dataset$mean <- as.numeric(as.character(dataset$mean)) 
dataset$lower <- as.numeric(as.character(dataset$lower)) 
dataset$upper <- as.numeric(as.character(dataset$upper)) 
dataset$var <- rep(c(rep(6,2),rep(5,2),rep(4,2),rep(3,2),rep(2,2),rep(1,2)),3)
dataset$varnames <- rev(dataset$variable)
dataset$Source <- rep(c("BSAS 1994-6","YouGov 2020"),18)
dataset$question <- c(rep("Combined",12),rep("Deservingness",12),rep("Redistribution",12))

pdf(file = paste0(plot.dir, "fig4.pdf"), width=7.5,height=3.25)
ggplot(data = dataset, aes(x = factor(var), y = mean, ymin = lower, ymax = upper,colour = Source, shape=Source)) +
  geom_errorbar(position = position_dodge(width = 0.5), width = 0, size=0.5) +
  geom_point(position = position_dodge(width = 0.5),size=2.7) +
  coord_flip() +
  facet_wrap(~ question) +
  theme_bw() +
  scale_x_discrete(name="",labels=unique(dataset$varnames)) +
  scale_y_continuous(name="Mean Support",limits=c(2.75,3.7)) +
  theme(legend.position = "bottom", legend.title = element_blank(), panel.grid = element_blank(), 
        panel.border = element_rect(color="gray")) 
dev.off()


######################
## 4. MRP ESTIMATION
######################

# Multilevel models for BSAS data [note: took about 4 hours per model for us on a standard laptop]
mod.bsascombo <- brm(combo ~ (1|female) + (1|age_cat) + (1|lad_past_nm_96) + (1|rgn19nm_no_1) +
                       la_claimants +  la_pc65plus + la_pc1824 + la_hhinc + la_gva + la_houseprice +
                       la_socbensrd  + la_popdensity + la_degree,
                     data=bsas, family=gaussian(),
                     control=list(adapt_delta=.99,max_treedepth = 15))
summary(mod.bsascombo)


mod.bsasredist <- brm(redist ~ (1|female) + (1|age_cat) + (1|lad_past_nm_96) + (1|rgn19nm_no_1) +
                        la_claimants +  la_pc65plus + la_pc1824 + la_hhinc + la_gva + la_houseprice +
                        la_socbensrd  + la_popdensity + la_degree,
                      data=bsas, family=gaussian(),
                      control=list(adapt_delta=.99,max_treedepth = 15))
summary(mod.bsasredist)


mod.bsasdeserve <- brm(deserve ~ (1|female) + (1|age_cat) + (1|lad_past_nm_96) + (1|rgn19nm_no_1) +
                         la_claimants +  la_pc65plus + la_pc1824 + la_hhinc + la_gva + la_houseprice +
                         la_socbensrd  + la_popdensity + la_degree,
                       data=bsas, family=gaussian(),
                       control=list(adapt_delta=.99,max_treedepth = 15))
summary(mod.bsasdeserve)


# Multilevel models for YouGov data   [note: took about 2.5 hours per model for us on a standard laptop]
mod.yougovcombo <- brm(combo ~ (1|female) + (1|age_cat) + (1|lad19nm) + (1|rgn19nm) +
                         la_claimants +  la_pc65plus + la_pc1824 + la_hhinc + 
                         la_gva + la_houseprice + la_socbensrd  + la_popdensity + 
                         la_degree,
                       data=yougov, family=gaussian(),
                       control=list(adapt_delta=.99,max_treedepth = 15))
summary(mod.yougovcombo)


mod.yougovredist <- brm(redist ~ (1|female) + (1|age_cat) + (1|lad19nm) + (1|rgn19nm) +
                          la_claimants +  la_pc65plus + la_pc1824 + la_hhinc + 
                          la_gva + la_houseprice + la_socbensrd  + la_popdensity + 
                          la_degree,
                        data=yougov, family=gaussian(),
                        control=list(adapt_delta=.99,max_treedepth = 15))
summary(mod.yougovredist)


mod.yougovdeserve <- brm(deserve ~ (1|female) + (1|age_cat) + (1|lad19nm) + (1|rgn19nm) +
                           la_claimants +  la_pc65plus + la_pc1824 + la_hhinc + 
                           la_gva + la_houseprice + la_socbensrd  + la_popdensity + 
                           la_degree,
                         data=yougov, family=gaussian(),
                         control=list(adapt_delta=.99,max_treedepth = 15))
summary(mod.yougovdeserve)

save.image(paste0(output.dir,"brm_models.RData"))


## Post-stratify, BSAS data ##

# Combined model
load(paste0(data.dir,"post96.Rda")) #post-stratification data for local authorities, 1996
post96$prediction <- as.data.frame(mod.bsascombo %>% 
                                     predict(newdata=post96,allow_new_levels=TRUE))$Estimate
post96$weight.pred <- post96$prediction*post96$value 

pred_bsas_combo_repl <- post96 %>% group_by(lad_past_nm_96) %>% summarise(final.est=sum(weight.pred))

# uncertainty
draws_pstrat <- t(as.data.frame(mod.bsascombo %>% posterior_epred(newdata=post96,
                                                                  allow_new_levels=TRUE)))*post96$value
# sum weighted predictions within each LA
draws_pstrat <- cbind(post96,draws_pstrat) %>% 
  group_by(lad_past_nm_96) %>% 
  summarise(across(c(25:4024),sum)) 

# 95% credible interval
draws_pstrat <- draws_pstrat %>% mutate(
  lowerq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.025, na.rm = TRUE),
  upperq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.975, na.rm = TRUE))
pred_bsas_combo_repl$lowerq <- draws_pstrat$lowerq
pred_bsas_combo_repl$upperq <- draws_pstrat$upperq

save(pred_bsas_combo_repl,file=paste0(output.dir,"pred_bsas_combo_repl.Rda"))


# Redistribution model 
load(paste0(data.dir,"post96.Rda")) 
post96$prediction <- as.data.frame(mod.bsasredist %>% 
                                     predict(newdata=post96,allow_new_levels=TRUE))$Estimate
post96$weight.pred <- post96$prediction*post96$value 

pred_bsas_redist_repl <- post96 %>% group_by(lad_past_nm_96) %>% summarise(final.est=sum(weight.pred))

# uncertainty
draws_pstrat <- t(as.data.frame(mod.bsasredist %>% posterior_epred(newdata=post96,
                                                                   allow_new_levels=TRUE)))*post96$value
# sum weighted predictions within each LA
draws_pstrat <- cbind(post96,draws_pstrat) %>% 
  group_by(lad_past_nm_96) %>% 
  summarise(across(c(25:4024),sum)) 

# 95% credible interval
draws_pstrat <- draws_pstrat %>% mutate(
  lowerq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.025, na.rm = TRUE),
  upperq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.975, na.rm = TRUE))
pred_bsas_redist_repl$lowerq <- draws_pstrat$lowerq
pred_bsas_redist_repl$upperq <- draws_pstrat$upperq

save(pred_bsas_redist_repl,file=paste0(output.dir,"pred_bsas_redist_repl.Rda"))



# Deservingness model 
load(paste0(data.dir,"post96.Rda")) 
post96$prediction <- as.data.frame(mod.bsasdeserve %>% 
                                     predict(newdata=post96,allow_new_levels=TRUE))$Estimate
post96$weight.pred <- post96$prediction*post96$value 

pred_bsas_deserve_repl <- post96 %>% group_by(lad_past_nm_96) %>% summarise(final.est=sum(weight.pred))

# uncertainty
draws_pstrat <- t(as.data.frame(mod.bsasdeserve %>% posterior_epred(newdata=post96,
                                                                    allow_new_levels=TRUE)))*post96$value
# sum weighted predictions within each LA
draws_pstrat <- cbind(post96,draws_pstrat) %>% 
  group_by(lad_past_nm_96) %>% 
  summarise(across(c(25:4024),sum)) 

# 95% credible interval
draws_pstrat <- draws_pstrat %>% mutate(
  lowerq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.025, na.rm = TRUE),
  upperq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.975, na.rm = TRUE))
pred_bsas_deserve_repl$lowerq <- draws_pstrat$lowerq
pred_bsas_deserve_repl$upperq <- draws_pstrat$upperq

save(pred_bsas_deserve_repl,file=paste0(output.dir,"pred_bsas_deserve_repl.Rda"))



## Post-stratify, YouGov data ##

# combined model 
load(paste0(data.dir,"post20.Rda")) #post-stratification data for local authorities, 2020
post20$prediction <- as.data.frame(mod.yougovcombo %>% 
                                     predict(newdata=post20,allow_new_levels=TRUE))$Estimate
post20$weight.pred <- post20$prediction*post20$value 

pred_yougov_combo_repl <- post20 %>% group_by(lad19nm) %>% summarise(final.est=sum(weight.pred))

# uncertainty
draws_pstrat <- t(as.data.frame(mod.yougovcombo %>% posterior_epred(newdata=post20,
                                                                    allow_new_levels=TRUE)))*post20$value
# sum weighted predictions within each LA
draws_pstrat <- cbind(post20,draws_pstrat) %>% 
  group_by(lad19nm) %>% 
  summarise(across(c(25:4024),sum)) 

# 95% credible interval
draws_pstrat <- draws_pstrat %>% mutate(
  lowerq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.025, na.rm = TRUE),
  upperq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.975, na.rm = TRUE))
pred_yougov_combo_repl$lowerq <- draws_pstrat$lowerq
pred_yougov_combo_repl$upperq <- draws_pstrat$upperq

save(pred_yougov_combo_repl,file=paste0(output.dir,"pred_yougov_combo_repl.Rda"))


# redistribution model 
load(paste0(data.dir,"post20.Rda"))
post20$prediction <- as.data.frame(mod.yougovredist %>% 
                                     predict(newdata=post20,allow_new_levels=TRUE))$Estimate
post20$weight.pred <- post20$prediction*post20$value 

pred_yougov_redist_repl <- post20 %>% group_by(lad19nm) %>% summarise(final.est=sum(weight.pred))

# uncertainty
draws_pstrat <- t(as.data.frame(mod.yougovredist %>% posterior_epred(newdata=post20,
                                                                     allow_new_levels=TRUE)))*post20$value
# sum weighted predictions within each LA
draws_pstrat <- cbind(post20,draws_pstrat) %>% 
  group_by(lad19nm) %>% 
  summarise(across(c(25:4024),sum)) 

# 95% credible interval
draws_pstrat <- draws_pstrat %>% mutate(
  lowerq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.025, na.rm = TRUE),
  upperq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.975, na.rm = TRUE))
pred_yougov_redist_repl$lowerq <- draws_pstrat$lowerq
pred_yougov_redist_repl$upperq <- draws_pstrat$upperq

save(pred_yougov_redist_repl,file=paste0(output.dir,"pred_yougov_redist_repl.Rda"))


# deservingness model 
load(paste0(data.dir,"post20.Rda"))
post20$prediction <- as.data.frame(mod.yougovdeserve %>% 
                                     predict(newdata=post20,allow_new_levels=TRUE))$Estimate
post20$weight.pred <- post20$prediction*post20$value 

pred_yougov_deserve_repl <- post20 %>% group_by(lad19nm) %>% summarise(final.est=sum(weight.pred))

# uncertainty
draws_pstrat <- t(as.data.frame(mod.yougovdeserve %>% posterior_epred(newdata=post20,
                                                                      allow_new_levels=TRUE)))*post20$value
# sum weighted predictions within each LA
draws_pstrat <- cbind(post20,draws_pstrat) %>% 
  group_by(lad19nm) %>% 
  summarise(across(c(25:4024),sum)) 

# 95% credible interval
draws_pstrat <- draws_pstrat %>% mutate(
  lowerq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.025, na.rm = TRUE),
  upperq = rowQuantiles(as.matrix(draws_pstrat[,2:4001]), 
                        probs=0.975, na.rm = TRUE))
pred_yougov_deserve_repl$lowerq <- draws_pstrat$lowerq
pred_yougov_deserve_repl$upperq <- draws_pstrat$upperq

save(pred_yougov_deserve_repl,file=paste0(output.dir,"pred_yougov_deserve_repl.Rda"))


########################################
# Figure 5: Distribution of MRP results
########################################

### BSAS MRP results
pred_bsas_comb <- as.data.table(pred_bsas_combo_repl) 
pred_bsas_deserve <- as.data.table(pred_bsas_deserve_repl) 
pred_bsas_redist <- as.data.table(pred_bsas_redist_repl) 

names(pred_bsas_comb)[2:4] <- c("final.est.comb","lowerq.comb","upperq.comb")
names(pred_bsas_deserve)[2:4] <- c("final.est.deserving","lowerq.deserving","upperq.deserving")
names(pred_bsas_redist)[2:4] <- c("final.est.redist","lowerq.redist","upperq.redist")

# Combine
pred_bsas_est <- merge(pred_bsas_comb, pred_bsas_deserve, by="lad_past_nm_96")
pred_bsas_est <- merge(pred_bsas_est, pred_bsas_redist, by="lad_past_nm_96")

pred_bsas_est[ , type := "bsas 1995" ]
pred_bsas_est[ , year := 1996 ]
setnames(pred_bsas_est, "lad_past_nm_96", "local.authority.name")


### YouGov MRP results
pred_yougov_comb <- as.data.table(pred_yougov_combo_repl) 
pred_yougov_deserve <- as.data.table(pred_yougov_deserve_repl) 
pred_yougov_redist <- as.data.table(pred_yougov_redist_repl) 

names(pred_yougov_comb)[2:4] <- c("final.est.comb","lowerq.comb","upperq.comb")
names(pred_yougov_deserve)[2:4] <- c("final.est.deserving","lowerq.deserving","upperq.deserving")
names(pred_yougov_redist)[2:4] <- c("final.est.redist","lowerq.redist","upperq.redist")

# Combine
pred_yougov_est <- merge(pred_yougov_comb, pred_yougov_deserve, by="lad19nm")
pred_yougov_est <- merge(pred_yougov_est, pred_yougov_redist, by="lad19nm")

pred_yougov_est[ , type := "yougov 2020" ]
pred_yougov_est[ , year := 2020 ]
setnames(pred_yougov_est, "lad19nm", "local.authority.name")

### Combine BSAS and YouGov
pred_est <- rbind(pred_bsas_est, pred_yougov_est)

setnames(pred_est, "final.est.comb", "lad_estimate_comb")
setnames(pred_est, "final.est.deserving", "lad_estimate_deserving")
setnames(pred_est, "final.est.redist", "lad_estimate_redist")


### Normalize 
pred_est[ , lad_estimate_comb_norm := lapply( .SD, function (x) BBmisc::normalize(x, method = "range", range = c(0, 1))), .SDcols="lad_estimate_comb" ]
pred_est[ , lowerq.comb_norm := lapply( .SD, function (x) BBmisc::normalize(x, method = "range", range = c(0, 1))), .SDcols="lowerq.comb" ]
pred_est[ , upperq.comb_norm := lapply( .SD, function (x) BBmisc::normalize(x, method = "range", range = c(0, 1))), .SDcols="upperq.comb" ]

pred_est[ , lad_estimate_deserving_norm := lapply( .SD, function (x) BBmisc::normalize(x, method = "range", range = c(0, 1))), .SDcols="lad_estimate_deserving" ]
pred_est[ , lowerq.deserving_norm := lapply( .SD, function (x) BBmisc::normalize(x, method = "range", range = c(0, 1))), .SDcols="lowerq.deserving" ]
pred_est[ , upperq.deserving_norm := lapply( .SD, function (x) BBmisc::normalize(x, method = "range", range = c(0, 1))), .SDcols="upperq.deserving" ]

pred_est[ , lad_estimate_redist_norm := lapply( .SD, function (x) BBmisc::normalize(x, method = "range", range = c(0, 1))), .SDcols="lad_estimate_redist" ]
pred_est[ , lowerq.redist_norm := lapply( .SD, function (x) BBmisc::normalize(x, method = "range", range = c(0, 1))), .SDcols="lowerq.redist" ]
pred_est[ , upperq.redist_norm := lapply( .SD, function (x) BBmisc::normalize(x, method = "range", range = c(0, 1))), .SDcols="upperq.redist" ]

### Density
pred_est_long <- as.data.table(melt(pred_est[,.(local.authority.name,type,lad_estimate_comb_norm,lad_estimate_deserving_norm,lad_estimate_redist_norm)],
                                    id.vars=c("local.authority.name","type")))

pred_est_long[ variable == "lad_estimate_comb_norm" , variable2 := "Combined social policy support" ]
pred_est_long[ variable == "lad_estimate_redist_norm" , variable2 := "Redistribution" ]
pred_est_long[ variable == "lad_estimate_deserving_norm" , variable2 := "Deservingness" ]

pred_est_long[ type == "bsas 1995", svy_year := "1994-6" ]
pred_est_long[ type == "yougov 2020", svy_year := "2020" ]

# Average estimate by year / type
pred_est_long[ , mean_lad_estimate_norm_type := lapply( .SD, function (x) mean(x, na.rm=T) ), by=c("svy_year","variable"), .SDcols="value" ]


pdf(paste0(plot.dir, "fig5.pdf"), width=7.5, height=3.75)
(plot <- (ggplot(pred_est_long, aes(x=value, group=svy_year, fill=svy_year))
          + geom_density(alpha=0.5)
          + geom_vline(aes(xintercept = mean_lad_estimate_norm_type, linetype=svy_year), linewidth=0.9, colour="gray30", show.legend = F)
          + theme_bw()
          + ylab("Density")
          + xlab("LAD social policy support")
          + facet_wrap(~ variable2)
          + theme(legend.position="bottom", panel.grid = element_blank(), 
                  panel.border = element_rect(color="gray"))
          + scale_fill_brewer("Survey year", palette="Set1")
          + scale_linetype("Survey year")
))
dev.off()






######################
### Figure 6: Maps ###
######################

### Load shapefiles

### 2020
lad_shp <- sf::st_read(paste0(data.dir,"shapefiles/LAD_2020"))
lad_shp$lad20nm <- tolower(lad_shp$lad20nm)

lad_shp <- merge(lad_shp, pred_est, by.x=c("lad20nm"), by.y=c("local.authority.name"), all.x=T)

### Remove NAs
lad_shp <- lad_shp[ !is.na(lad_shp$lad_estimate_comb), ]


### 1998 

### England
shp_98_ENG <- sf::st_read(paste0(data.dir,"shapefiles/LAD_ENG_1998"))
shp_98_ENG$id <- tolower(shp_98_ENG$LAD98NM)

### Scotland
shp_98_SCOT <- sf::st_read(paste0(data.dir,"shapefiles/Councils_SCOT_1998"))
shp_98_SCOT$id <- tolower(shp_98_SCOT$CA98NM)

pred_est_1995 <- pred_est[ type == "bsas 1995" ]

### Rename
pred_est_1995[ , id := local.authority.name]
pred_est_1995[ local.authority.name == "bristol" , id := "bristol, city of" ]
pred_est_1995[ local.authority.name == "ellesmere port and neston" , id := "ellesmere port & neston" ]
pred_est_1995[ local.authority.name == "herefordshire" , id := "herefordshire, county of" ]
pred_est_1995[ local.authority.name == "kings lynn and west norfolk" , id := "king's lynn and west norfolk" ]
pred_est_1995[ local.authority.name == "kingston upon hull" , id := "kingston upon hull, city of" ]
pred_est_1995[ local.authority.name == "southend" , id := "southend-on-sea" ]
pred_est_1995[ local.authority.name == "st edmundsbury" , id := "st. edmundsbury" ]
pred_est_1995[ local.authority.name == "st helens" , id := "st. helens" ]
pred_est_1995[ local.authority.name == "the vale of glamorgan" , id := "vale of glamorgan, the" ]
pred_est_1995[ local.authority.name == "edinburgh" , id := "edinburgh, city of" ]

# Merge
shp_98_ENG <- merge(shp_98_ENG, pred_est_1995, by=c("id"), all.x=T)
shp_98_SCOT <- merge(shp_98_SCOT, pred_est_1995, by=c("id"), all.x=T)

### Plots
(plot_yougov1x <- ggplot()
 + geom_sf(data = lad_shp, mapping=aes(fill=lad_estimate_comb_norm), colour="NA")  
 + coord_sf()
 + theme_minimal()
 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
         axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
         axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
         legend.position = "bottom", legend.text = element_text(size=8), legend.key.size = unit(0.5, "cm"))
 +   scale_fill_viridis(name="Combined welfare support", limits = c(0,1), breaks=seq(0,1,0.25),
                        guide = guide_legend(keyheight = unit(3, units = "mm"), keywidth=unit(12, units = "mm"),
                                             label.position = "bottom", title.position = 'top', nrow=1), option="inferno")
 + ggtitle("2020")
)


(plot_bsas1x <- ggplot()
  + geom_sf(data = shp_98_ENG, mapping=aes(fill=lad_estimate_comb_norm), colour="NA")  
  + geom_sf(data = shp_98_SCOT, mapping=aes(fill=lad_estimate_comb_norm), colour="NA")  
  + coord_sf()
  + theme_minimal()
  + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
          axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
          legend.position = "bottom", legend.text = element_text(size=8), legend.key.size = unit(0.5, "cm"))
  +   scale_fill_viridis(name="Combined welfare support", limits = c(0,1), breaks=seq(0,1,0.25),
                         guide = guide_legend(keyheight = unit(3, units = "mm"), keywidth=unit(12, units = "mm"),
                                              label.position = "bottom", title.position = 'top', nrow=1), option="inferno")
  + ggtitle("1994-6")
)



### Combine maps
pdf(paste0(plot.dir, "fig6.pdf"), width=7, height=7) 
ggpubr::ggarrange(plot_bsas1x, plot_yougov1x, nrow=1, common.legend = TRUE, legend="bottom")
dev.off()





############################################
### Figure 7: Local authority covariates ###
############################################

### Load original covariate tables
covariates_1996 <- as.data.table(read.csv(paste0(data.dir,"covars1996.csv"))) 
covariates_2019 <- as.data.table(read.csv(paste0(data.dir,"covars2020.csv"))) 

### Lookup table 
lookup_table <- as.data.table(read.csv(paste0(data.dir,"lookup_9619.csv")))
lookup_table[ , c("X") := NULL ]

setnames(lookup_table, "lad19nm", "lad19nm_target")

covariates_1996_v1 <- merge(covariates_1996, lookup_table, by="lad96nm", all.x=T)

### Rename
covariates_1996_v1[ lad96nm == "blackburn with darwen" , lad19nm_target := "blackburn with darwen" ]
covariates_1996_v1[ lad96nm == "telford and wrekin" , lad19nm_target := "telford and wrekin" ]

setnames(covariates_1996_v1, "la_socbensrd", "la_socbensrd_log")
setnames(covariates_1996_v1, "la_degree", "la_prop_degree")


### Use LAD target from lookup table to take averages
covariates_1996_avg2019 <- covariates_1996_v1[ , lapply( .SD, function (x) mean(x, na.rm=F) ), by=c("lad19nm_target"), 
                                               .SDcols=c("la_claimants", "la_houseprice", "la_pc1824", "la_pc65plus", "la_gva", 
                                                         "la_hhinc", "la_socbensrd_log", "la_popdensity", "la_prop_degree") ]

### Combine
covariates_1996_avg2019[ , year := 1996 ]  

covariates_2019[ , year := 2020 ]  
covariates_2019[ , la_socbensrd_log := log(la_socbensrd)]

setnames(covariates_1996_avg2019, "lad19nm_target", "lad19nm")

covariates_comb <- rbind(covariates_1996_avg2019, covariates_2019, fill=T)

### Inflation data
inflation <- as.data.table(read.csv(paste0(data.dir,"inflation.csv"), header=T))

# Reflate to 2020=1
inflation[ , inflation_index := inflation_index / 108.7 ]

covariates_comb <- merge(covariates_comb, inflation, by="year", all.x=T)

covariates_comb[ , la_houseprice_cpi := la_houseprice / inflation_index ]
covariates_comb[ , la_hhinc_cpi := la_hhinc / inflation_index ]

covariates_comb[ , la_popdensity_log := log(la_popdensity)]
covariates_comb[ , la_houseprice_cpi_log := log(la_houseprice_cpi) ]
covariates_comb[ , la_hhinc_cpi_log := log(la_hhinc_cpi) ]

### Melt
covariates_comb_l <- as.data.table(melt(covariates_comb, id.vars=c("lad19nm","year"))) 
covariates_comb_l[ , year := factor(year)]
covariates_comb_l[ , variable := gsub("la_","",variable)]

covariates_comb_l[ , value := as.numeric(value) ]

vars <- c("claimants","pc1824","pc65plus","gva","prop_degree","hhinc_cpi_log","socbensrd_log","houseprice_cpi_log","popdensity_log")

covariates_comb_l[ variable == "claimants" , type2 := "Unemployment rate (%)"]
covariates_comb_l[ variable == "prop_degree" , type2 := "Share people with degree (%)"]
covariates_comb_l[ variable == "hhinc_cpi_log" , type2 := "Household income (log)"]
covariates_comb_l[ variable == "pc1824" , type2 := "Share 18-24 year-olds (%)"]
covariates_comb_l[ variable == "houseprice_cpi_log" , type2 := "Mean house price (log)"]

covariates_comb_l[ variable == "prop_degree" , value := value * 100 ]
covariates_comb_l[ variable == "pc1824" , value := value * 100 ]

covariates_comb_l[ year == "1996" , svy_year := "1994-6"]
covariates_comb_l[ year == "2020" , svy_year := "2020"]

covariates_comb_l[ , type3 := factor(type2, levels=c("Household income (log)","Unemployment rate (%)","Share 18-24 year-olds (%)","Share people with degree (%)","Mean house price (log)"))]

pdf(paste0(plot.dir, "fig7.pdf"), width=7, height=5.5)
(plot <- (ggplot(covariates_comb_l[ variable %in% c("claimants","prop_degree","hhinc_cpi_log","pc1824","houseprice_cpi_log") ], 
                 aes(x=value, group=svy_year, fill=svy_year))
          + geom_density(alpha=0.5)
          + theme_bw()
          + xlab("Value")
          + ylab("Density")
          + facet_wrap( ~ type3, scales="free")
          + theme(legend.position=c(0.8,0.25), panel.grid.minor=element_blank(), 
                  panel.border = element_rect(color="gray"))
          + scale_fill_brewer("Year", palette="Set1")
          + scale_linetype("Year")
))
dev.off()



##############################################################
### Figure 8: plot for all three MRP preference dimensions ###
##############################################################

### Merge with predicted MRP estimates
pred_est_cov1 <- merge(pred_est, covariates_comb, by.x=c("local.authority.name","year"), by.y=c("lad19nm","year"))

pred_est_cov1[ type == "bsas 1995" , type2 := "1994-6"]
pred_est_cov1[ type == "yougov 2020" , type2 := "2020"]

pred_est_cov1[ , la_pc1824_pct := la_pc1824 * 100 ]
pred_est_cov1[ , la_prop_degree_pct := la_prop_degree * 100 ]

pred_est_cov1_l <- as.data.table(melt(pred_est_cov1[ ,c("type2","lad_estimate_comb_norm","lad_estimate_deserving_norm","lad_estimate_redist_norm",
                                                        "la_houseprice_cpi_log","la_popdensity", "la_pc1824_pct", "la_prop_degree_pct", "la_claimants", "la_hhinc_cpi_log"), with=F ],
                                      id.vars=c("type2","la_houseprice_cpi_log","la_popdensity", "la_pc1824_pct", "la_prop_degree_pct", "la_claimants", "la_hhinc_cpi_log")))

pred_est_cov1_l <- pred_est_cov1_l[ !is.na(value) ]

pred_est_cov1_l[ variable == "lad_estimate_comb_norm" , variable2 := "Combined"]
pred_est_cov1_l[ variable == "lad_estimate_deserving_norm" , variable2 := "Deservingness"]
pred_est_cov1_l[ variable == "lad_estimate_redist_norm" , variable2 := "Redistribution"]

### Combined
median_soc_pref <- pred_est_cov1_l[ , lapply( .SD, function (x) median(x, na.rm=T) ), by=c("type2"), .SDcols=c("value") ]
median_soc_pref2 <- pred_est_cov1_l[ variable2 == "Combined", lapply( .SD, function (x) median(x, na.rm=T) ), by=c("type2"), .SDcols=c("value") ]

(plot1 <- (ggplot(pred_est_cov1_l[ !is.na(variable2) ], aes(y=value, x=la_hhinc_cpi_log, colour=variable2, shape=variable2, fill=variable2))
           + geom_point(size=0.8, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + geom_hline(data=median_soc_pref, aes(yintercept = value), size=0.7, colour="gray40", linetype="dashed")
           + ylab("Social policy support")
           + xlab("Log gross household income")
           + theme_bw()
           + facet_wrap(~ type2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Preference\ndimension", palette="Set1")
           + scale_fill_brewer(name="Preference\ndimension", palette="Set1")
           + scale_shape_manual(name="Preference\ndimension", values=c(15:17))
           + ggtitle("Household income")
))

(plot2 <- (ggplot(pred_est_cov1_l[ ], aes(y=value, x=log(la_claimants), colour=variable2, shape=variable2, fill=variable2))
           + geom_point(size=0.8, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + geom_hline(data=median_soc_pref, aes(yintercept = value), size=0.7, colour="gray40", linetype="dashed")
           + ylab("Social policy support")
           + xlab("Log unemployment rate")
           + theme_bw()
           + facet_wrap(~ type2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Preference\ndimension", palette="Set1")
           + scale_fill_brewer(name="Preference\ndimension", palette="Set1")
           + scale_shape_manual(name="Preference\ndimension", values=c(15:17))
           + ggtitle("Unemployment")
))

(plot3 <- (ggplot(pred_est_cov1_l[ ], aes(y=value, x=la_prop_degree_pct, colour=variable2, shape=variable2, fill=variable2))
           + geom_point(size=0.8, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + geom_hline(data=median_soc_pref, aes(yintercept = value), size=0.7, colour="gray40", linetype="dashed")
           + ylab("Social policy support")
           + xlab("Share with degree")
           + theme_bw()
           + facet_wrap(~ type2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Preference\ndimension", palette="Set1")
           + scale_fill_brewer(name="Preference\ndimension", palette="Set1")
           + scale_shape_manual(name="Preference\ndimension", values=c(15:17))
           + ggtitle("Education")
))

(plot4 <- (ggplot(pred_est_cov1_l[ ], aes(y=value, x=la_pc1824_pct, colour=variable2, shape=variable2, fill=variable2))
           + geom_point(size=0.8, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + geom_hline(data=median_soc_pref, aes(yintercept = value), size=0.7, colour="gray40", linetype="dashed")
           + ylab("Social policy support")
           + xlab("Share 18-24 years old")
           + theme_bw()
           + facet_wrap(~ type2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Preference\ndimension", palette="Set1")
           + scale_fill_brewer(name="Preference\ndimension", palette="Set1")
           + scale_shape_manual(name="Preference\ndimension", values=c(15:17))
           + ggtitle("Age profile")
))

(plot5 <- (ggplot(pred_est_cov1_l[ ], aes(y=value, x=log(la_popdensity), colour=variable2, shape=variable2, fill=variable2))
           + geom_point(size=0.8, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + geom_hline(data=median_soc_pref, aes(yintercept = value), size=0.7, colour="gray40", linetype="dashed")
           + ylab("Social policy support")
           + xlab("Log population density")
           + theme_bw()
           + facet_wrap(~ type2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Preference\ndimension", palette="Set1")
           + scale_fill_brewer(name="Preference\ndimension", palette="Set1")
           + scale_shape_manual(name="Preference\ndimension", values=c(15:17))
           + ggtitle("Population density")
))

(plot6 <- (ggplot(pred_est_cov1_l[ ], aes(y=value, x=la_houseprice_cpi_log, colour=variable2, shape=variable2, fill=variable2))
           + geom_point(size=0.8, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + geom_hline(data=median_soc_pref, aes(yintercept = value), size=0.7, colour="gray40", linetype="dashed")
           + ylab("Social policy support")
           + xlab("Log mean house prices")
           + theme_bw()
           + facet_wrap(~ type2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Preference\ndimension", palette="Set1")
           + scale_fill_brewer(name="Preference\ndimension", palette="Set1")
           + scale_shape_manual(name="Preference\ndimension", values=c(15:17))
           + ggtitle("House prices")
))

pdf(paste0(plot.dir, "fig8.pdf"), width=9, height=10)  
ggpubr::ggarrange(plot1,plot2,plot3,plot4,plot5,plot6, 
                  ncol=2, nrow=3, common.legend = TRUE, legend="bottom")
dev.off()



#############################
### Figure 9: Brexit vote ###
#############################

### Load data
brexit <- as.data.table(read.csv(paste0(data.dir,"brexit_vote_LAD.csv")))

names(brexit) <- c("lad19nm", "pct_remain", "pct_leave")

### Use lookup table to create 1996 LADs
brexit_v1 <- merge(brexit, lookup_table, by.x="lad19nm", by.y="lad19nm_target", all.x=T)

brexit_2020_pred <- merge(brexit, pred_est[ type == "yougov 2020" ,.(local.authority.name,lad_estimate_comb_norm,lad_estimate_deserving_norm,lad_estimate_redist_norm) ], 
                          by.x="lad19nm", by.y="local.authority.name")

brexit_1996_pred <- merge(brexit_v1, pred_est[ type == "bsas 1995" ,.(local.authority.name,lad_estimate_comb_norm,lad_estimate_deserving_norm,lad_estimate_redist_norm) ], 
                          by.x="lad96nm", by.y="local.authority.name", all.x=T)


### Take average to map 1996 LADs to 2020 LADs (account for merges)
brexit_1996_pred_avg <- brexit_1996_pred[ , lapply( .SD, function (x) mean(x, na.rm=F) ), by=c("lad19nm"), 
                                          .SDcols=c("lad_estimate_comb_norm","lad_estimate_deserving_norm","lad_estimate_redist_norm","pct_remain","pct_leave") ]

brexit_1996_pred_avg[ , type := "1994-6" ]
brexit_2020_pred[ , type := "2020" ]

brexit_96_20 <- rbind(brexit_1996_pred_avg, brexit_2020_pred)

pdf(paste0(plot.dir, "fig9.pdf"),  width=6.5, height=3)  
(plot <- (ggplot(brexit_96_20[ ], aes(x=pct_leave, group=1))
          + geom_point(aes(y=lad_estimate_comb_norm, shape="Combined", colour="Combined"), size=0.8, alpha=0.4)
          + geom_point(aes(y=lad_estimate_deserving_norm, shape="Deservingness", colour="Deservingness"), size=0.8, alpha=0.4)
          + geom_point(aes(y=lad_estimate_redist_norm, shape="Redistribution", colour="Redistribution"), size=0.8, alpha=0.4)
          + stat_smooth(aes(y=lad_estimate_comb_norm), size=1.4, colour="white", se = F)
          + stat_smooth(aes(y=lad_estimate_deserving_norm), size=1.4, colour="white", se = F)
          + stat_smooth(aes(y=lad_estimate_redist_norm), size=1.4, colour="white", se = F)
          + stat_smooth(aes(y=lad_estimate_comb_norm, colour="Combined"), size=1, se = F)
          + stat_smooth(aes(y=lad_estimate_deserving_norm, colour="Deservingness"), size=1, se = F)
          + stat_smooth(aes(y=lad_estimate_redist_norm, colour="Redistribution"), size=1, se = F)
          + ylab("Social policy support")
          + xlab("Brexit share leave")
          + theme_bw()
          + facet_wrap(~ type)
          + theme(legend.position="right", panel.grid.minor=element_blank(), 
                  panel.border = element_rect(color="gray"))
          + scale_x_continuous(limits=c(20,76))
          + scale_color_brewer(name="Preference\ndimension", palette="Set1")
          + scale_fill_brewer(name="Preference\ndimension", palette="Set1")
          + scale_shape_manual(name="Preference\ndimension", values=c(15:17)) 
))
dev.off()



####################################
### Figure 10: Changes over time ###
####################################

setkey(pred_est_cov1, local.authority.name, year)

pred_est_cov1[ , lad_estimate_norm_95 := shift(.SD, type='lag', n=1), by = "local.authority.name", .SDcols="lad_estimate_comb_norm"]
pred_est_cov1[ , lad_estimate_redist_norm_95 := shift(.SD, type='lag', n=1), by = "local.authority.name", .SDcols="lad_estimate_redist_norm"]
pred_est_cov1[ , lad_estimate_deserving_norm_95 := shift(.SD, type='lag', n=1), by = "local.authority.name", .SDcols="lad_estimate_deserving_norm"]

pred_est_cov1[ , la_prop_degree_95 := shift(.SD, type='lag', n=1), by = "local.authority.name", .SDcols="la_prop_degree"]
pred_est_cov1[ , la_pc1824_95 := shift(.SD, type='lag', n=1), by = "local.authority.name", .SDcols="la_pc1824"]

pred_est_cov1[ , change_lad_estimate_norm := lad_estimate_comb_norm - lad_estimate_norm_95 ]
pred_est_cov1[ , change_lad_estimate_redist_norm := lad_estimate_redist_norm - lad_estimate_redist_norm_95 ]
pred_est_cov1[ , change_lad_estimate_deserving := lad_estimate_deserving_norm - lad_estimate_deserving_norm_95 ]

pred_est_cov1[ , change_la_prop_degree := la_prop_degree - la_prop_degree_95 ]
pred_est_cov1[ , change_la_pc1824 := la_pc1824 - la_pc1824_95 ]

pred_est_cov_melt <- as.data.table(melt(pred_est_cov1[ year == 2020 ,.(change_lad_estimate_norm,change_lad_estimate_redist_norm,change_lad_estimate_deserving,
                                                                       change_la_prop_degree,change_la_pc1824) ],
                                        id.vars=c("change_la_prop_degree","change_la_pc1824")))

pred_est_cov_melt[ variable == "change_lad_estimate_norm" , variable2 := "Combined"]
pred_est_cov_melt[ variable == "change_lad_estimate_deserving" , variable2 := "Deservingness"]
pred_est_cov_melt[ variable == "change_lad_estimate_redist_norm" , variable2 := "Redistribution"]

pdf(paste0(plot.dir, "fig10.pdf"),  width=5.25, height=3.75) 
(p <- (ggplot(pred_est_cov_melt, aes(y=value, x=change_la_prop_degree, colour=variable2, fill=variable2, shape=variable2))
       + geom_point(size=0.9, alpha=0.5)
       + stat_smooth(size=1.5, se=F, colour="white")
       + stat_smooth(size=1.1, se=F)
       + ylab("Change in mean social policy support")
       + xlab("Change in share with degree")
       + theme_bw()
       + theme(legend.position="right", panel.grid.minor=element_blank(), 
               panel.border = element_rect(color="gray"))
       + scale_color_brewer(name="Preference\ndimension", palette="Set1")
       + scale_fill_brewer(name="Preference\ndimension", palette="Set1")
       + scale_shape_manual(name="Preference\ndimension", values=c(15:17))
))
dev.off()





######################################################
### Figure 11: MRP results and local election outcomes
######################################################

load(paste0(data.dir,"loc_elec.Rda"))

### Data for 1994-1996
local_elec_95 <- unique(loc_elec[ year %in% c(1994,1995,1996) ])

### Average by lad19nm for LADs that have elections on both years
local_elec_95_avg <- local_elec_95[ , lapply( .SD, function (x) mean(x, na.rm=F) ), by=c("lad19nm","party"), .SDcols="percent_share" ]

local_elec_95_d1 <- local_elec_95_avg[ party == "Con" ,.(lad19nm,percent_share)]
setnames(local_elec_95_d1, "percent_share","vote_share_Con")

local_elec_95_d2 <- local_elec_95_avg[ party == "Lab" ,.(lad19nm,percent_share)]
setnames(local_elec_95_d2, "percent_share","vote_share_Lab")

local_elec_95_d3 <- local_elec_95_avg[ party == "LD" ,.(lad19nm,percent_share)]
setnames(local_elec_95_d3, "percent_share","vote_share_LD")

local_elec_95_d4 <- local_elec_95_avg[ party == "SNP" ,.(lad19nm,percent_share)]
setnames(local_elec_95_d4, "percent_share","vote_share_SNP")

local_elec_95_d5 <- local_elec_95_avg[ party == "Green" ,.(lad19nm,percent_share)]
setnames(local_elec_95_d5, "percent_share","vote_share_Green")

# Merge
local_elec_95_comb <- merge(local_elec_95_d1, local_elec_95_d2, by=c("lad19nm"), all=T)
local_elec_95_comb <- merge(local_elec_95_comb, local_elec_95_d3, by=c("lad19nm"), all=T)
local_elec_95_comb <- merge(local_elec_95_comb, local_elec_95_d4, by=c("lad19nm"), all=T)
local_elec_95_comb <- merge(local_elec_95_comb, local_elec_95_d5, by=c("lad19nm"), all=T)

local_elec_95_fin <- local_elec_95_comb
local_elec_95_fin[ , year := "1994-96" ]


### Data for 2018
local_elec_18 <- unique(loc_elec[ year %in% c(2018) ])

### Average by lad19nm
local_elec_18_d1 <- local_elec_18[ party == "Con" ,.(lad19nm,percent_share)]
setnames(local_elec_18_d1, "percent_share","vote_share_Con")

local_elec_18_d2 <- local_elec_18[ party == "Lab" ,.(lad19nm,percent_share)]
setnames(local_elec_18_d2, "percent_share","vote_share_Lab")

local_elec_18_d3 <- local_elec_18[ party == "LD" ,.(lad19nm,percent_share)]
setnames(local_elec_18_d3, "percent_share","vote_share_LD")

local_elec_18_d4 <- local_elec_18[ party == "SNP" ,.(lad19nm,percent_share)]
setnames(local_elec_18_d4, "percent_share","vote_share_SNP")

local_elec_18_d5 <- local_elec_18[ party == "Green" ,.(lad19nm,percent_share)]
setnames(local_elec_18_d5, "percent_share","vote_share_Green")

# Merge
local_elec_18_comb <- merge(local_elec_18_d1, local_elec_18_d2, by=c("lad19nm"), all=T)
local_elec_18_comb <- merge(local_elec_18_comb, local_elec_18_d3, by=c("lad19nm"), all=T)
local_elec_18_comb <- merge(local_elec_18_comb, local_elec_18_d4, by=c("lad19nm"), all=T)
local_elec_18_comb <- merge(local_elec_18_comb, local_elec_18_d5, by=c("lad19nm"), all=T)

local_elec_18_fin <- local_elec_18_comb
local_elec_18_fin[ , year := "2018" ]

### Load data for local election 2019
load(paste0(data.dir,"loc_elec_19.Rda"))

loc_elec_19[ , council_name := tolower(council_name) ]
setnames(loc_elec_19, "council_name", "lad19nm")

### Fix misspellings
loc_elec_19[, lad19nm := gsub("&","and",lad19nm)]

loc_elec_19[ lad19nm == "barrow in furness" , lad19nm := "barrow-in-furness" ]
loc_elec_19[ lad19nm == "herefordshire" , lad19nm := "herefordshire, county of" ]
loc_elec_19[ lad19nm == "kings lynn and west norfolk" , lad19nm := "king's lynn and west norfolk" ]
loc_elec_19[ lad19nm == "kingston upon hull" , lad19nm := "kingston upon hull, city of" ]
loc_elec_19[ lad19nm == "southend on sea" , lad19nm := "southend-on-sea" ]
loc_elec_19[ lad19nm == "st helens" , lad19nm := "st. helens" ]
loc_elec_19[ lad19nm == "stoke on trent" , lad19nm := "stoke-on-trent" ]
loc_elec_19[ lad19nm == "stratford on avon" , lad19nm := "stratford-on-avon" ]

### Merge / check councils names
loc_elec_19_v2 <- merge(loc_elec_19, unique(loc_elec[ ,.(lad19nm,lad19cd) ]), by="lad19nm", all.x=T) 

setnames(loc_elec_19_v2, "vote_share_CON","vote_share_Con")
setnames(loc_elec_19_v2, "vote_share_LAB","vote_share_Lab")

loc_elec_19_v2[ , year := 2019 ]
loc_elec_19_v2[ , lad19cd := NULL ]

### Combine 2018/2019
# add LAD that did not have elections in 2019
local_elec_18_19 <- rbind(loc_elec_19_v2,local_elec_18_fin[ ! local_elec_18_fin$lad19nm %in% loc_elec_19_v2$lad19nm ], fill=T)

### Combine with 1990s
local_elec_fin <- rbind(local_elec_95_fin, local_elec_18_19, fill=T)

local_elec_fin[ year == "1994-96" , type := "bsas 1994-6"]
local_elec_fin[ year %in% c("2018","2019") , type := "yougov 2020"]

setkey(local_elec_fin, lad19nm, year)

local_elec_fin[ year == "1994-96" , year := "1994-\n1996"]
local_elec_fin[ year %in% c("2018","2019") , year := "2018-\n2019"]

local_elec_fin_l <- as.data.table(melt(local_elec_fin, id.vars=c("lad19nm","year","type")))
local_elec_fin_l[ , year := factor(year)]
local_elec_fin_l[ , variable := gsub("vote_share_","",variable)]


### Merge with predicted social policy estimates
pred_est_sub <- pred_est[ ,.(local.authority.name, type, lad_estimate_comb_norm, lad_estimate_deserving_norm, lad_estimate_redist_norm) ]

pred_est_sub[ type == "bsas 1995" , type := "bsas 1994-6" ]

local_elec_predicted <- merge(local_elec_fin, pred_est_sub, by.x=c("lad19nm", "type"), by.y=c("local.authority.name","type")) 

### Plots
local_elec_predicted_l <- as.data.table(melt(local_elec_predicted[ ,.(year, lad_estimate_comb_norm, lad_estimate_deserving_norm, lad_estimate_redist_norm, 
                                                                      vote_share_Con, vote_share_Lab, vote_share_LD, vote_share_SNP, vote_share_Green)],
                                             id.vars=c("year","lad_estimate_comb_norm","lad_estimate_deserving_norm", "lad_estimate_redist_norm"), value.name = "vote_share", variable.name = "party"))

local_elec_predicted_l[ , party := gsub("vote_share_","",party)]

local_elec_predicted_l[ party == "Con", party := "Conservatives" ]
local_elec_predicted_l[ party == "Lab", party := "Labour" ]
local_elec_predicted_l[ party == "LD", party := "Liberal Democrats" ]
local_elec_predicted_l[,.N, party ]

local_elec_predicted_l[ , party := factor(party, levels=c("Conservatives","Labour","Liberal Democrats","Green","SNP"))]

local_elec_predicted_l[ year == "1994-\n1996" , year := "1994-6" ]
local_elec_predicted_l[ year == "2018-\n2019" , year := "2020" ]


pdf(paste0(plot.dir, "fig11.pdf"), width=7.5, height=5)  
(plot <- (ggplot(local_elec_predicted_l[ party != "SNP" ], aes(x=vote_share, group=1))
          + geom_point(aes(y=lad_estimate_comb_norm, shape="Combined", colour="Combined"), size=0.8, alpha=0.4)
          + geom_point(aes(y=lad_estimate_deserving_norm, shape="Deservingness", colour="Deservingness"), size=0.8, alpha=0.4)
          + geom_point(aes(y=lad_estimate_redist_norm, shape="Redistribution", colour="Redistribution"), size=0.8, alpha=0.4)
          + stat_smooth(aes(y=lad_estimate_comb_norm), size=1.4, colour="white", se = F)
          + stat_smooth(aes(y=lad_estimate_deserving_norm), size=1.4, colour="white", se = F)
          + stat_smooth(aes(y=lad_estimate_redist_norm), size=1.4, colour="white", se = F)
          + stat_smooth(aes(y=lad_estimate_comb_norm, colour="Combined"), size=1, se = F)
          + stat_smooth(aes(y=lad_estimate_deserving_norm, colour="Deservingness"), size=1, se = F)
          + stat_smooth(aes(y=lad_estimate_redist_norm, colour="Redistribution"), size=1, se = F)
          + ylab("Social policy support")
          + xlab("Vote share in local election")
          + theme_bw()
          + facet_grid(year ~ party, scales = "free")
          + theme(legend.position="bottom", panel.grid.minor=element_blank(),  
                  panel.border = element_rect(color="gray"))
          + scale_color_brewer(name="Preference dimension", palette="Set1")
          + scale_fill_brewer(name="Preference dimension", palette="Set1")
          + scale_shape_manual(name="Preference dimension", values=c(15:17))
))
dev.off()





#########################################################################
### Figure 12: Labor/Con vote share by socio-economic characteristics ###
#########################################################################

### Bring in covariates
local_elec_fin_sub <- local_elec_fin[ ,.(lad19nm,type,vote_share_Con,vote_share_Lab)]

# Use lookup table to create 1996 LADs
local_elec_1996 <- merge(local_elec_fin_sub[ type == "bsas 1994-6" ], lookup_table, by.x="lad19nm", by.y="lad19nm_target", all.x=T)

local_elec_2020 <- local_elec_fin_sub[ type == "yougov 2020" ]

### Take average to map 1996 LADs to 2020 LADs (account for merges)
local_elec_1996_avg <- local_elec_1996[ , lapply( .SD, function (x) mean(x, na.rm=F) ), by=c("lad19nm"), 
                                        .SDcols=c("vote_share_Con", "vote_share_Lab") ]

local_elec_1996_avg[ , type := "bsas 1994-6" ]

### Combine
local_elec_96_20 <- rbind(local_elec_1996_avg, local_elec_2020)

### Merge with covars
local_elec_96_20[ type == "bsas 1994-6" , year := "1996"]
local_elec_96_20[ type == "yougov 2020" , year := "2020"]

local_elec_96_20[ , year := as.numeric(year)]

d_vote_share_covar <- merge(local_elec_96_20, covariates_comb, by=c("lad19nm","year"), all.y=T)

d_vote_share_covar[ , la_pc1824_pct := la_pc1824 * 100 ]
d_vote_share_covar[ , la_prop_degree_pct := la_prop_degree * 100 ]

d_vote_share_covar_l <- as.data.table(melt(d_vote_share_covar[ ,c("year","lad19nm","vote_share_Con","vote_share_Lab","la_claimants", 
                                                                  "la_houseprice_cpi_log","la_popdensity", "la_pc1824_pct", "la_prop_degree_pct", "la_hhinc_cpi_log"), with=F ], 
                                           id.vars=c("year","lad19nm","la_houseprice_cpi_log","la_popdensity", "la_pc1824_pct", "la_prop_degree_pct", "la_claimants", "la_hhinc_cpi_log")))

d_vote_share_covar_l[ variable == "vote_share_Lab" , variable2 := "Labour"]
d_vote_share_covar_l[ variable == "vote_share_Con" , variable2 := "Conservatives"]
d_vote_share_covar_l[ , variable2 := factor(variable2, levels=c("Labour","Conservatives"))]
d_vote_share_covar_l <- d_vote_share_covar_l[ !is.na(variable2) ]

d_vote_share_covar_l[ year == "1996" , year2 := "1994-6"]
d_vote_share_covar_l[ year == "2020" , year2 := "2020"]

### Plots
(plot1 <- (ggplot(d_vote_share_covar_l[ ], aes(y=value, x=la_hhinc_cpi_log, colour=variable2, fill=variable2, shape=variable2))
           + geom_point(size=0.9, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + ylab("Vote share")
           + xlab("Log household income")
           + theme_bw()
           + facet_wrap(~ year2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Party", palette="Set1")
           + scale_fill_brewer(name="Party", palette="Set1")
           + scale_shape_manual(name="Party", values=c(15:17))
           + ggtitle("Household income")
))

(plot2 <- (ggplot(d_vote_share_covar_l[ ], aes(y=value, x=log(la_claimants), colour=variable2, fill=variable2, shape=variable2))
           + geom_point(size=0.9, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + ylab("Vote share")
           + xlab("Log unemployment rate")
           + theme_bw()
           + facet_wrap(~ year2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Party", palette="Set1")
           + scale_fill_brewer(name="Party", palette="Set1")
           + scale_shape_manual(name="Party", values=c(15:17))
           + ggtitle("Unemployment")
))


(plot3 <- (ggplot(d_vote_share_covar_l[ ], aes(y=value, x=la_prop_degree_pct, colour=variable2, shape=variable2))
           + geom_point(size=0.9, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + ylab("Vote share")
           + xlab("Share people with degree")
           + theme_bw()
           + facet_wrap(~ year2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Party", palette="Set1")
           + scale_fill_brewer(name="Party", palette="Set1")
           + scale_shape_manual(name="Party", values=c(15:17))
           + ggtitle("Education")
))

(plot4 <- (ggplot(d_vote_share_covar_l[ ], aes(y=value, x=la_pc1824_pct, colour=variable2, shape=variable2))
           + geom_point(size=0.9, alpha=0.4)
           + stat_smooth(size=1.4, colour="white", se=F)
           + stat_smooth(size=1, se=F)
           + ylab("Vote share")
           + xlab("Share people 18-24yo")
           + theme_bw()
           + facet_wrap(~ year2)
           + theme(legend.position="right", panel.grid.minor=element_blank(), 
                   panel.border = element_rect(color="gray"))
           + scale_color_brewer(name="Party", palette="Set1")
           + scale_fill_brewer(name="Party", palette="Set1")
           + scale_shape_manual(name="Party", values=c(15:17))
           + ggtitle("Age profile")
))

pdf(paste0(plot.dir, "fig12.pdf"),  width=9, height=6.5)  
ggpubr::ggarrange(plot1,plot2,plot3,plot4, ncol=2, nrow=2, common.legend = TRUE, legend="bottom")
dev.off()



###############################################################
## APPENDIX PART A. COMPARISON OF YOUGOV TO 2020 BSAS SURVEY ##
###############################################################

bsa20 <- read.dta13(paste0(data.dir,"bsa20.dta"))

bsa20$redist <- NA
bsa20$redist[which(as.numeric(bsa20$Redistrb)==2)] <- 5
bsa20$redist[which(as.numeric(bsa20$Redistrb)==3)] <- 4
bsa20$redist[which(as.numeric(bsa20$Redistrb)==4)] <- 3
bsa20$redist[which(as.numeric(bsa20$Redistrb)==5)] <- 2
bsa20$redist[which(as.numeric(bsa20$Redistrb)==6)] <- 1

# Shd spend more on bens 
bsa20$spendmore <- rep(NA, length(bsa20$morewelf))
bsa20$spendmore[which(as.numeric(bsa20$morewelf)==2)] <- 5
bsa20$spendmore[which(as.numeric(bsa20$morewelf)==3)] <- 4
bsa20$spendmore[which(as.numeric(bsa20$morewelf)==4)] <- 3
bsa20$spendmore[which(as.numeric(bsa20$morewelf)==5)] <- 2
bsa20$spendmore[which(as.numeric(bsa20$morewelf)==6)] <- 1

# people on bens don't really deserve help 
bsa20$sochelp <- NA
bsa20$sochelp[which(as.numeric(bsa20$SocHelp)==6)] <- 5
bsa20$sochelp[which(as.numeric(bsa20$SocHelp)==5)] <- 4
bsa20$sochelp[which(as.numeric(bsa20$SocHelp)==4)] <- 3
bsa20$sochelp[which(as.numeric(bsa20$SocHelp)==3)] <- 2
bsa20$sochelp[which(as.numeric(bsa20$SocHelp)==2)] <- 1

# prevent standing on own two feet 
bsa20$feet <- NA
bsa20$feet[which(as.numeric(bsa20$WelfFeet)==6)] <- 5
bsa20$feet[which(as.numeric(bsa20$WelfFeet)==5)] <- 4
bsa20$feet[which(as.numeric(bsa20$WelfFeet)==4)] <- 3
bsa20$feet[which(as.numeric(bsa20$WelfFeet)==3)] <- 2
bsa20$feet[which(as.numeric(bsa20$WelfFeet)==2)] <- 1

# fiddling one way or another 
bsa20$fiddle <- NA
bsa20$fiddle[which(as.numeric(bsa20$DoleFidl)==6)] <- 5
bsa20$fiddle[which(as.numeric(bsa20$DoleFidl)==5)] <- 4
bsa20$fiddle[which(as.numeric(bsa20$DoleFidl)==4)] <- 3
bsa20$fiddle[which(as.numeric(bsa20$DoleFidl)==3)] <- 2
bsa20$fiddle[which(as.numeric(bsa20$DoleFidl)==2)] <- 1

bsa20 <- bsa20 %>% filter(complete.cases(feet,fiddle,sochelp,redist,spendmore))
bsa20$combo <- (bsa20$feet + bsa20$fiddle + bsa20$sochelp + bsa20$redist + bsa20$spendmore)/5
bsa20$deserve <- (bsa20$feet + bsa20$fiddle + bsa20$sochelp)/3
bsa20$redist_sc <- (bsa20$redist + bsa20$spendmore)/2

confint <- function(x){
  myvals <- c( mean(x) - 1.96*(sd(x)/sqrt(length(x))),
               mean(x) + 1.96*(sd(x)/sqrt(length(x))))
  return(myvals)
}

dataset <- as.data.frame(rbind(
  c("Redistribution",mean(bsa20$redist_sc),confint(bsa20$redist_sc)),
  c("Redistribution",mean(yougov$redist),confint(yougov$redist)),
  c("Deservingness",mean(bsa20$deserve),confint(bsa20$deserve)),
  c("Deservingness",mean(yougov$deserve),confint(yougov$deserve)),
  c("Combined",mean(bsa20$combo),confint(bsa20$combo)),
  c("Combined",mean(yougov$combo),confint(yougov$combo))
))

colnames(dataset)=c("variable","diff","lower","upper")
dataset$diff <- as.numeric(as.character(dataset$diff)) 
dataset$lower <- as.numeric(as.character(dataset$lower)) 
dataset$upper <- as.numeric(as.character(dataset$upper)) 
dataset$comparison <- c(rep(3,2),rep(2,2),rep(1,2))
dataset$varnames <- rev(dataset$variable)
dataset$Source <- rep(c("BSAS","YouGov"),3)

pdf(file=paste0(plot.dir,"figA1.pdf"),width=5, height=3.5)
ggplot(data = dataset, aes(x = factor(comparison), y = diff, ymin = lower, 
                           ymax = upper,colour = Source, shape=Source)) +
  geom_errorbar(position = position_dodge(width = 0.5), width = 0) +
  geom_point(position = position_dodge(width = 0.5), size=3) +
  coord_flip() +
  scale_x_discrete(name="",labels=unique(dataset$varnames)) +
  scale_y_continuous(name="Mean Support",limits=c(2.9,3.6)) +
  theme(axis.text.y = element_text(hjust=0)) +  
  theme_bw() +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank(), panel.border = element_rect(color="gray"), panel.grid.minor = element_blank()) 
dev.off()


dataset2 <- as.data.frame(rbind(
  c("Redistrb",mean(bsa20$redist),confint(bsa20$redist)),
  c("Redistrb",mean(yougov$redistrb),confint(yougov$redistrb)),
  c("Welffeet",mean(bsa20$feet),confint(bsa20$feet)),
  c("Welffeet",mean(yougov$welffeet),confint(yougov$welffeet)),
  c("Dolefidl",mean(bsa20$fiddle),confint(bsa20$fiddle)),
  c("Dolefidl",mean(yougov$dolefidl),confint(yougov$dolefidl)),
  c("Sochelp",mean(bsa20$sochelp),confint(bsa20$sochelp)),
  c("Sochelp",mean(yougov$sochelp),confint(yougov$sochelp)),
  c("Morewelf",mean(bsa20$spendmore),confint(bsa20$spendmore)),
  c("Morewelf",mean(yougov$morewelf),confint(yougov$morewelf))
))

colnames(dataset2)=c("variable","diff","lower","upper")
dataset2$diff <- as.numeric(as.character(dataset2$diff)) 
dataset2$lower <- as.numeric(as.character(dataset2$lower)) 
dataset2$upper <- as.numeric(as.character(dataset2$upper)) 
dataset2$comparison <- c(rep(5,2),rep(4,2),rep(3,2),rep(2,2),rep(1,2))
dataset2$varnames <- rev(dataset2$variable)
dataset2$Source <- rep(c("BSAS","YouGov"),5)

pdf(file=paste0(plot.dir,"figA2.pdf"), width=5, height=3.5)
ggplot(data = dataset2, aes(x = factor(comparison), y = diff, ymin = lower, ymax = upper,colour = Source, shape=Source)) +
  geom_errorbar(position = position_dodge(width = 0.5), width = 0) +
  geom_point(position = position_dodge(width = 0.5), size=3) +
  coord_flip() +
  scale_x_discrete(name="",labels=unique(dataset2$varnames)) +
  scale_y_continuous(name="Mean Support",limits=c(2.9,3.6)) +
  theme(axis.text.y = element_text(hjust=0)) +  
  geom_hline(aes(yintercept=0),size=0.5) +
  theme_bw() +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank(), panel.border = element_rect(color="gray"), panel.grid.minor = element_blank()) 
dev.off()

# subgroups, 2020
bsa20$degree <- ifelse(bsa20$HEdQual2=="Degree or equivalent, and above",1,0)
bsa20$secondary <- ifelse(bsa20$HEdQual2=="No qualifications"|bsa20$HEdQual2=="Qualifications below A levels, such as GCSE/O Level/Standard Grade, vocational level 3 or equivalent"|
                             bsa20$HEdQual2=="A levels/SCE Highers including vocational level 3 or equivalent, and above",1,0)
bsa20$eighteen <- ifelse(bsa20$Ragecat=="18-24",1,0)
bsa20$sixtyfive <- ifelse(bsa20$Ragecat=="65+",1,0)

confint <- function(x,var){
  myvals <- c( mean(x[which(var==1)]) - 1.96*(sd(x[which(var==1)])/sqrt(length(x[which(var==1)]))),
               mean(x[which(var==1)]) + 1.96*(sd(x[which(var==1)])/sqrt(length(x[which(var==1)]))))
  return(c(mean(x[which(var==1)]),myvals))
}

dataset_2020 <- as.data.frame(rbind(
  c("Secondary\nEducation",confint(bsa20$combo,bsa20$secondary)),
  c("Secondary\nEducation",confint(yougov$combo,yougov$secondary)),
  c("University\nDegree",confint(bsa20$combo,bsa20$degree)),
  c("University\nDegree",confint(yougov$combo,yougov$degree)),
  c("Aged\n18-24",confint(bsa20$combo,bsa20$eighteen)),
  c("Aged\n18-24",confint(yougov$combo,yougov$eighteen)),
  c("Aged\n65+",confint(bsa20$combo,bsa20$sixtyfive)),
  c("Aged\n65+",confint(yougov$combo,yougov$sixtyfive)),
  
  c("Secondary\nEducation",confint(bsa20$deserve,bsa20$secondary)),
  c("Secondary\nEducation",confint(yougov$deserve,yougov$secondary)),
  c("University\nDegree",confint(bsa20$deserve,bsa20$degree)),
  c("University\nDegree",confint(yougov$deserve,yougov$degree)),
  c("Aged\n18-24",confint(bsa20$deserve,bsa20$eighteen)),
  c("Aged\n18-24",confint(yougov$deserve,yougov$eighteen)),
  c("Aged\n65+",confint(bsa20$deserve,bsa20$sixtyfive)),
  c("Aged\n65+",confint(yougov$deserve,yougov$sixtyfive)),
  
  c("Secondary\nEducation",confint(bsa20$redist,bsa20$secondary)),
  c("Secondary\nEducation",confint(yougov$redist,yougov$secondary)),
  c("University\nDegree",confint(bsa20$redist,bsa20$degree)),
  c("University\nDegree",confint(yougov$redist,yougov$degree)),
  c("Aged\n18-24",confint(bsa20$redist,bsa20$eighteen)),
  c("Aged\n18-24",confint(yougov$redist,yougov$eighteen)),
  c("Aged\n65+",confint(bsa20$redist,bsa20$sixtyfive)),
  c("Aged\n65+",confint(yougov$redist,yougov$sixtyfive))
))


colnames(dataset_2020)=c("variable","mean","lower","upper")
dataset_2020$mean <- as.numeric(as.character(dataset_2020$mean)) 
dataset_2020$lower <- as.numeric(as.character(dataset_2020$lower)) 
dataset_2020$upper <- as.numeric(as.character(dataset_2020$upper)) 
dataset_2020$var <- rep(c(rep(4,2),rep(3,2),rep(2,2),rep(1,2)),3)
dataset_2020$varnames <- rev(dataset_2020$variable)
dataset_2020$Source <- rep(c("BSAS 2020","YouGov 2020"),12)
dataset_2020$question <- c(rep("Combined",8),rep("Deservingness",8),rep("Redistribution",8))

pdf(file=paste0(plot.dir,"figA3.pdf"),width=9,height=4)
ggplot(data = dataset_2020, aes(x = factor(var), y = mean, ymin = lower, 
                                ymax = upper,colour = Source, shape=Source)) +
  facet_wrap(~question) +
  geom_point(position = position_dodge(width = 0.5),size=2.5) +
  geom_errorbar(position = position_dodge(width = 0.5), width = 0.2, size=0.2) +
  coord_flip() +
  theme_bw() +
  scale_x_discrete(name="",labels=unique(dataset_2020$varnames)) +
  scale_y_continuous(name="Mean Support",limits=c(2.75,3.75)) +
  theme(axis.text.y = element_text(hjust=0)) +  
  geom_hline(aes(yintercept=0),size=0.5) +
  theme(axis.text.x=element_text(size=9)) +
  theme(axis.title.x=element_text(size=9)) +
  theme(axis.text.y=element_text(size=9)) +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank()) +
  theme(plot.margin=unit(c(0.5,1,0.5,0.5),"cm")) +
  theme(panel.spacing.x = unit(2, "lines"))
dev.off()

#####################################################################
## APPENDIX PART C. COMPARISON OF BAYESIAN AND NON-BAYESIAN ESTIMATES
#####################################################################

mlmod_bsas_combo <- lmer(combo ~ (1|female) + (1|age_cat) + (1|lad_past_nm_96) + (1|rgn19nm_no_1) +
                     la_claimants +  la_pc65plus + la_pc1824 + la_hhinc + la_gva + la_houseprice +
                     la_socbensrd  + la_popdensity + la_degree ,data=bsas)

load(paste0(data.dir,"post96.Rda")) #post-stratification data for local authorities, 1996
post96$prediction <- predict(mlmod_bsas_combo,newdata=post96,type="response",allow.new.levels=TRUE)
post96$weight.pred <- post96$prediction*post96$value  
pred_bsas_ml_combo <- data.table(post96)[ , .(final.est = sum(weight.pred)), by = .(lad_past_nm_96)]
pred_bsas_ml_combo <- pred_bsas_ml_combo %>% arrange(lad_past_nm_96)

pred_bsas_combo_repl$final.est.ml <- pred_bsas_ml_combo$final.est

pdf(paste0(plot.dir,"figC1.pdf"), width=6, height=5.5)
ggplot(aes(y=final.est,x=final.est.ml),data=pred_bsas_combo_repl)+
  geom_point(alpha=0.75) +
  theme_bw() +
  ylab("Bayesian Estimates") + 
  xlab("Non-Bayesian Estimates") 
dev.off()


mlmod_yougov_combo <- lmer(combo ~ (1|female) + (1|age_cat) + (1|lad19nm) + (1|rgn19nm) +
                       la_claimants +  la_pc65plus + la_pc1824 + la_hhinc + la_gva + la_houseprice +
                       la_socbensrd  + la_popdensity + la_degree ,data=yougov)

load(paste0(data.dir,"post20.Rda"))
post20$prediction <- predict(mlmod_yougov_combo,newdata=post20,type="response",allow.new.levels=TRUE)
post20$weight.pred <- post20$prediction*post20$value  
pred_yougov_ml_combo <- data.table(post20)[ , .(final.est = sum(weight.pred)), by = .(lad19nm)]
pred_yougov_ml_combo <- pred_yougov_ml_combo %>% arrange(lad19nm)

pred_yougov_combo_repl$final.est.ml <- pred_yougov_ml_combo$final.est

pdf(paste0(plot.dir,"figC2.pdf"), width=6, height=5.5)
ggplot(aes(y=final.est,x=final.est.ml),data=pred_yougov_combo_repl)+
  geom_point(alpha=0.75) +
  theme_bw() +
  ylab("Bayesian Estimates") + 
  xlab("Non-Bayesian Estimates") 
dev.off()



#################################################
## APPENDIX PART D. UNCERTAINTY OF MRP ESTIMATES
#################################################

### By type
pred_est_all1 <- pred_est[ , .(local.authority.name, year, lad_estimate_comb, lowerq.comb, upperq.comb) ]
pred_est_all2 <- pred_est[ , .(local.authority.name, year, lad_estimate_redist, lowerq.redist, upperq.redist) ]
pred_est_all3 <- pred_est[ , .(local.authority.name, year, lad_estimate_deserving, lowerq.deserving, upperq.deserving) ]

names(pred_est_all1) <- c("lad","year","coef","lower","upper")
names(pred_est_all2) <- c("lad","year","coef","lower","upper")
names(pred_est_all3) <- c("lad","year","coef","lower","upper")

pred_est_all1[ , type := "Combined support" ]
pred_est_all2[ , type := "Redistribution" ]
pred_est_all3[ , type := "Deservingness" ]

# Plots
(plot_96_1 <- (ggplot(pred_est_all1[ year == "1996" ], aes(y=coef, x=reorder(lad,coef)))
               + geom_errorbar(aes(ymin=lower, ymax=upper), width=0, color="gray")
               + geom_point(size=0.7)
               + theme_bw()
               + ylab("")
               + xlab("")
               + coord_flip()
               + facet_wrap(~ type)
               + scale_y_continuous(limits=c(2.6,4))
               + theme(legend.position="bottom", panel.grid.minor=element_blank(), panel.grid.major=element_blank(), 
                       axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                       panel.border = element_rect(color="gray"))
))

(plot_96_2 <- (ggplot(pred_est_all2[ year == "1996" ], aes(y=coef, x=reorder(lad,coef)))
               + geom_errorbar(aes(ymin=lower, ymax=upper), width=0, color="gray")
               + geom_point(size=0.7)
               + theme_bw()
               + ylab("Average LAD social policy support")
               + xlab("")
               + coord_flip()
               + facet_wrap(~ type)
               + scale_y_continuous(limits=c(2.6,4))
               + theme(legend.position="bottom", panel.grid.minor=element_blank(), panel.grid.major=element_blank(), 
                       axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                       panel.border = element_rect(color="gray"))
))

(plot_96_3 <- (ggplot(pred_est_all3[ year == "1996" ], aes(y=coef, x=reorder(lad,coef)))
               + geom_errorbar(aes(ymin=lower, ymax=upper), width=0, color="gray")
               + geom_point(size=0.7)
               + theme_bw()
               + ylab("")
               + xlab("")
               + coord_flip()
               + facet_wrap(~ type)
               + scale_y_continuous(limits=c(2.6,4))
               + theme(legend.position="bottom", panel.grid.minor=element_blank(), panel.grid.major=element_blank(), 
                       axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                       panel.border = element_rect(color="gray"))
))


### -----------
### SI Figure D1 (top)
### -----------

pdf(paste0(plot.dir, "figD1_top.pdf"), width=10, height=3.5) 
grid.arrange(plot_96_1,plot_96_2,plot_96_3, nrow=1, top="1994-6")
dev.off()


### 2020
(plot_20_1 <- (ggplot(pred_est_all1[ year == "2020" ], aes(y=coef, x=reorder(lad,coef)))
               + geom_errorbar(aes(ymin=lower, ymax=upper), width=0, color="gray")
               + geom_point(size=0.7)
               + theme_bw()
               + ylab("")
               + xlab("")
               + coord_flip()
               + facet_wrap(~ type)
               + scale_y_continuous(limits=c(2.6,4))
               + theme(legend.position="bottom", panel.grid.minor=element_blank(), panel.grid.major=element_blank(), 
                       axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                       panel.border = element_rect(color="gray"))
))

(plot_20_2 <- (ggplot(pred_est_all2[ year == "2020" ], aes(y=coef, x=reorder(lad,coef)))
               + geom_errorbar(aes(ymin=lower, ymax=upper), width=0, color="gray")
               + geom_point(size=0.7)
               + theme_bw()
               + ylab("Average LAD social policy support")
               + xlab("")
               + coord_flip()
               + facet_wrap(~ type)
               + scale_y_continuous(limits=c(2.6,4))
               + theme(legend.position="bottom", panel.grid.minor=element_blank(), panel.grid.major=element_blank(), 
                       axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                       panel.border = element_rect(color="gray"))
))


(plot_20_3 <- (ggplot(pred_est_all3[ year == "2020" ], aes(y=coef, x=reorder(lad,coef)))
               + geom_errorbar(aes(ymin=lower, ymax=upper), width=0, color="gray")
               + geom_point(size=0.7)
               + theme_bw()
               + ylab("")
               + xlab("")
               + coord_flip()
               + facet_wrap(~ type)
               + scale_y_continuous(limits=c(2.6,4))
               + theme(legend.position="bottom", panel.grid.minor=element_blank(), panel.grid.major=element_blank(), 
                       axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                       panel.border = element_rect(color="gray"))
))


### -----------
### SI Figure D1 (bottom)
### -----------

pdf(paste0(plot.dir, "figD1_bot.pdf"), width=10, height=3.5) 
grid.arrange(plot_20_1,plot_20_2,plot_20_3, nrow=1, top="2020")
dev.off()


### Averages
pred_est1 <- pred_est[ , lapply( .SD, function (x) mean(x, na.rm=T) ), by=c("year"), .SDcols=c("lad_estimate_deserving", "lowerq.deserving", "upperq.deserving") ]
pred_est2 <- pred_est[ , lapply( .SD, function (x) mean(x, na.rm=T) ), by=c("year"), .SDcols=c("lad_estimate_redist", "lowerq.redist", "upperq.redist") ]
pred_est3 <- pred_est[ , lapply( .SD, function (x) mean(x, na.rm=T) ), by=c("year"), .SDcols=c("lad_estimate_comb", "lowerq.comb", "upperq.comb") ]

names(pred_est1) <- c("year","coef","lower","upper")
names(pred_est2) <- c("year","coef","lower","upper")
names(pred_est3) <- c("year","coef","lower","upper")

pred_est1[ , type := "Deservingness" ]
pred_est2[ , type := "Redistribution" ]
pred_est3[ , type := "Combined support" ]

pred_est_ci <- rbind(pred_est1,pred_est2,pred_est3)
pred_est_ci[ , year := factor(year) ]

pred_est_ci[ year == 1996 & type == "Deservingness", x2 := 1 ]
pred_est_ci[ year == 1996 & type == "Redistribution", x2 := 1.1 ]
pred_est_ci[ year == 1996 & type == "Combined support", x2 := 1.2 ]

pred_est_ci[ year == 2020 & type == "Deservingness", x2 := 1.7 ]
pred_est_ci[ year == 2020 & type == "Redistribution", x2 := 1.8 ]
pred_est_ci[ year == 2020 & type == "Combined support", x2 := 1.9 ]


### -----------
### SI Figure D2
### -----------

pdf(paste0(plot.dir, "figD2.pdf"), width=5.5, height=3) 
(plot <- (ggplot(pred_est_ci, aes(y=coef, x=x2, colour=type, shape=type))
          + geom_errorbar(aes(ymin=lower, ymax=upper), width=0, size=0.8)
          + geom_point(size=3)
          + theme_bw()
          + ylab("Average LAD social policy support")
          + xlab("Year")
          + coord_flip()
          + scale_x_continuous(limits=c(0.8,2.1), breaks=c(1.1,1.8), labels=c("1994-6","2020"))
          + scale_y_continuous(breaks=seq(3,3.6,0.2))
          + theme(legend.position="right", panel.grid.minor=element_blank(), panel.grid.major.y = element_blank(), 
                  panel.border = element_rect(color="gray"))
          + scale_color_brewer(name="Preference\ndimension", palette="Set1")
          + scale_fill_brewer(name="Preference\ndimension", palette="Set1")
          + scale_shape_manual(name="Preference\ndimension", values=c(15:17))
))
dev.off()



#####################################################
## APPENDIX PART E. MAPS OF SOCIAL POLICY PREFERENCES
#####################################################

### Maps for 1994-6 data

### -----------
### SI Figure E1 a
### -----------

(plot_bsas1 <- (ggplot()
                + geom_sf(data = shp_98_ENG, mapping=aes(fill=lad_estimate_comb_norm), colour="NA")  
                + geom_sf(data = shp_98_SCOT, mapping=aes(fill=lad_estimate_comb_norm), colour="NA") 
                + coord_sf()
                + theme_minimal()
                + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
                        axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                        legend.position = "bottom", legend.text = element_text(size=8), legend.key.size = unit(0.5, "cm"))
                +   scale_fill_viridis(name="Combined welfare support 1994-6", limits = c(0,1), breaks=seq(0,1,0.25),
                                       guide = guide_legend(keyheight = unit(3, units = "mm"), keywidth=unit(12, units = "mm"),
                                                            label.position = "bottom", title.position = 'top', nrow=1), option="inferno")
))

(plot_bsas2 <- (ggplot()
                + geom_sf(data = shp_98_ENG, mapping=aes(fill=lad_estimate_redist_norm), colour="NA")  
                + geom_sf(data = shp_98_SCOT, mapping=aes(fill=lad_estimate_redist_norm), colour="NA")  
                + coord_sf()
                + theme_minimal()
                + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
                        axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                        legend.position = "bottom", legend.text = element_text(size=8), legend.key.size = unit(0.5, "cm"))
                +   scale_fill_viridis(name="Redistribution 1994-6", limits = c(0,1), breaks=seq(0,1,0.25),
                                       guide = guide_legend(keyheight = unit(3, units = "mm"), keywidth=unit(12, units = "mm"),
                                                            label.position = "bottom", title.position = 'top', nrow=1), option="inferno")
))

(plot_bsas3 <- (ggplot()
                + geom_sf(data = shp_98_ENG, mapping=aes(fill=lad_estimate_deserving_norm), colour="NA")  
                + geom_sf(data = shp_98_SCOT, mapping=aes(fill=lad_estimate_deserving_norm), colour="NA")  
                + coord_sf()
                + theme_minimal()
                + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
                        axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
                        legend.position = "bottom", legend.text = element_text(size=8), legend.key.size = unit(0.5, "cm"))
                +   scale_fill_viridis(name="Deservingness 1994-6", limits = c(0,1), breaks=seq(0,1,0.25),
                                       guide = guide_legend(keyheight = unit(3, units = "mm"), keywidth=unit(12, units = "mm"),
                                                            label.position = "bottom", title.position = 'top', nrow=1), option="inferno")
))


pdf(paste0(plot.dir, "figE1a.pdf"), width=10, height=7) 
grid.arrange(plot_bsas1,plot_bsas2,plot_bsas3, nrow=1)
dev.off()



### Maps for 2020 data

### -----------
### SI Figure E1 b
### -----------

(plot_yougov1 <- ggplot()
 + geom_sf(data = lad_shp, mapping=aes(fill=lad_estimate_comb_norm), colour="NA") 
 + coord_sf()
 + theme_minimal()
 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
         axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
         axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
         legend.position = "bottom", legend.text = element_text(size=8), legend.key.size = unit(0.5, "cm"))
 +   scale_fill_viridis(name="Combined welfare support 2020", limits = c(0,1), breaks=seq(0,1,0.25),
                        guide = guide_legend(keyheight = unit(3, units = "mm"), keywidth=unit(12, units = "mm"),
                                             label.position = "bottom", title.position = 'top', nrow=1), option="inferno")
)

(plot_yougov2 <- ggplot()
  + geom_sf(data = lad_shp, mapping=aes(fill=lad_estimate_redist_norm), colour="NA")  
  + coord_sf()
  + theme_minimal()
  + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
          axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
          legend.position = "bottom", legend.text = element_text(size=8), legend.key.size = unit(0.5, "cm"))
  +   scale_fill_viridis(name="Redistribution 2020", limits = c(0,1), breaks=seq(0,1,0.25),
                         guide = guide_legend(keyheight = unit(3, units = "mm"), keywidth=unit(12, units = "mm"),
                                              label.position = "bottom", title.position = 'top', nrow=1), option="inferno")
)

(plot_yougov3 <- ggplot()
  + geom_sf(data = lad_shp, mapping=aes(fill=lad_estimate_deserving_norm), colour="NA")  
  + coord_sf()
  + theme_minimal()
  + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
          axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
          legend.position = "bottom", legend.text = element_text(size=8), legend.key.size = unit(0.5, "cm"))
  +   scale_fill_viridis(name="Deservingness 2020", limits = c(0,1), breaks=seq(0,1,0.25),
                         guide = guide_legend(keyheight = unit(3, units = "mm"), keywidth=unit(12, units = "mm"),
                                              label.position = "bottom", title.position = 'top', nrow=1), option="inferno")
)

pdf(paste0(plot.dir, "figE1b.pdf"), width=10, height=7)  
grid.arrange(plot_yougov1,plot_yougov2,plot_yougov3, nrow=1)
dev.off()






